You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2017/05/10 03:54:09 UTC
[07/43] metamodel git commit: METAMODEL-250: Added support for EBCDIC
files
METAMODEL-250: Added support for EBCDIC files
Closes #103
Project: http://git-wip-us.apache.org/repos/asf/metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/metamodel/commit/a1b9ff7f
Tree: http://git-wip-us.apache.org/repos/asf/metamodel/tree/a1b9ff7f
Diff: http://git-wip-us.apache.org/repos/asf/metamodel/diff/a1b9ff7f
Branch: refs/heads/5.x
Commit: a1b9ff7fbc22cbebd8abda60dc8954fbf58981ce
Parents: 2392557
Author: Kasper Sørensen <i....@gmail.com>
Authored: Mon Aug 1 21:19:22 2016 -0700
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Mon Aug 1 21:20:11 2016 -0700
----------------------------------------------------------------------
CHANGES.md | 1 +
.../fixedwidth/EbcdicConfiguration.java | 60 ++++
.../metamodel/fixedwidth/EbcdicReader.java | 75 +++++
.../fixedwidth/FixedWidthColumnSpec.java | 2 +-
.../fixedwidth/FixedWidthConfiguration.java | 199 +++++++------
.../FixedWidthConfigurationReader.java | 18 +-
.../fixedwidth/FixedWidthDataContext.java | 25 +-
.../metamodel/fixedwidth/FixedWidthDataSet.java | 3 +-
.../metamodel/fixedwidth/FixedWidthReader.java | 281 +++++++++++++++----
.../apache/metamodel/fixedwidth/EBCDICTest.java | 77 +++++
.../fixedwidth/FixedWidthConfigurationTest.java | 11 +-
.../fixedwidth/FixedWidthDataContextTest.java | 3 -
.../fixedwidth/FixedWidthReaderTest.java | 27 +-
.../test/resources/fixed-width-2-7-10-10.ebc | 1 +
14 files changed, 572 insertions(+), 211 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/CHANGES.md
----------------------------------------------------------------------
diff --git a/CHANGES.md b/CHANGES.md
index f0264c6..c0b90cc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,7 @@
* [METAMODEL-1099] - Created a new DataContextFactory SPI and a extensible registry of implementations based on ServiceLoader.
* [METAMODEL-1099] - Implemented DataContextFactory SPI for connectors: JDBC, CSV, ElasticSearch
+ * [METAMODEL-250] - Added support for EBCDIC files (part of 'fixedwidth' module).
* [METAMODEL-1103] - Fixed a bug pertaining to anchoring of wildcards in LIKE operands.
* [METAMODEL-1088] - Add support for aliases in MongoDB.
* [METAMODEL-1086] - Fixed encoding issue when CsvDataContext is instantiated with InputStream.
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java
new file mode 100644
index 0000000..389a4f8
--- /dev/null
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicConfiguration.java
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+/**
+ * Special fixed-width configuration for EBCDIC files.
+ */
+public final class EbcdicConfiguration extends FixedWidthConfiguration {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, encoding, fixedValueWidth, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ public EbcdicConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ /**
+ * Determines if the input file contains a header that should be skipped before reading records data.
+ *
+ * @return a boolean indicating whether or not to skip EBCDIC header.
+ */
+ public boolean isSkipEbcdicHeader() {
+ return _skipEbcdicHeader;
+ }
+
+ /**
+ * Determines if the input file contains new line characters.
+ *
+ * @return a boolean indicating whether or not the input contains new line characters.
+ */
+ public boolean isEolPresent() {
+ return _eolPresent;
+ }
+}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
new file mode 100644
index 0000000..a7639fc
--- /dev/null
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/EbcdicReader.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * Reader capable of separating values based on a fixed width setting.
+ */
+class EbcdicReader extends FixedWidthReader {
+
+ private final boolean _skipEbcdicHeader;
+ private final boolean _eolPresent;
+ private boolean _headerSkipped;
+
+ public EbcdicReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth, boolean skipEbcdicHeader, boolean eolPresent) {
+ super(stream, charsetName, valueWidths, failOnInconsistentLineWidth);
+ _skipEbcdicHeader = skipEbcdicHeader;
+ _eolPresent = eolPresent;
+ }
+
+ @Override
+ protected void beforeReadLine() {
+ if (shouldSkipHeader()) {
+ try {
+ skipHeader();
+ } catch (IOException e) {
+ throw new IllegalStateException("A problem occurred while skipping the input stream. ", e);
+ }
+ }
+ }
+
+ private boolean shouldSkipHeader() {
+ return (_skipEbcdicHeader && !_headerSkipped);
+ }
+
+ private void skipHeader() throws IOException {
+ _headerSkipped = true;
+ _stream.skip(_expectedLineLength);
+ }
+
+ @Override
+ protected String readSingleRecordData() throws IOException {
+ if (_eolPresent) {
+ return super.readSingleRecordData();
+ } else {
+ byte[] buffer = new byte[_expectedLineLength];
+ int bytesRead = _stream.read(buffer, 0, _expectedLineLength);
+
+ if (bytesRead < 0) {
+ return null;
+ }
+
+ return new String(buffer, _charsetName);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
index 65ec219..dedfbcd 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthColumnSpec.java
@@ -24,7 +24,7 @@ import org.apache.metamodel.util.HasName;
* Represents the specification of a single column for a
* {@link FixedWidthDataContext}.
*/
-public final class FixedWidthColumnSpec implements HasName {
+final class FixedWidthColumnSpec implements HasName {
private final String name;
private final int width;
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
index 2b2cae5..c53ff16 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfiguration.java
@@ -31,32 +31,29 @@ import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.HasNameMapper;
/**
- * Configuration of metadata about a fixed width values datacontext.
+ * Configuration of metadata about a fixed width values data context.
*/
-public final class FixedWidthConfiguration extends BaseObject implements
- Serializable {
+public class FixedWidthConfiguration extends BaseObject implements Serializable {
- private static final long serialVersionUID = 1L;
+ private static final long serialVersionUID = 1L;
- public static final int NO_COLUMN_NAME_LINE = 0;
- public static final int DEFAULT_COLUMN_NAME_LINE = 1;
+ public static final int NO_COLUMN_NAME_LINE = 0;
+ public static final int DEFAULT_COLUMN_NAME_LINE = 1;
- private final String encoding;
- private final int fixedValueWidth;
- private final int[] valueWidths;
- private final int columnNameLineNumber;
- private final boolean failOnInconsistentLineWidth;
- private final ColumnNamingStrategy columnNamingStrategy;
+ private final String encoding;
+ private final int fixedValueWidth;
+ private final int[] valueWidths;
+ private final int columnNameLineNumber;
+ private final boolean failOnInconsistentLineWidth;
+ private final ColumnNamingStrategy columnNamingStrategy;
- public FixedWidthConfiguration(int fixedValueWidth) {
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING,
- fixedValueWidth);
- }
+ public FixedWidthConfiguration(int fixedValueWidth) {
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, fixedValueWidth);
+ }
- public FixedWidthConfiguration(int[] valueWidth) {
- this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth,
- false);
- }
+ public FixedWidthConfiguration(int[] valueWidth) {
+ this(DEFAULT_COLUMN_NAME_LINE, FileHelper.DEFAULT_ENCODING, valueWidth, false);
+ }
public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int fixedValueWidth) {
this(columnNameLineNumber, encoding, fixedValueWidth, false);
@@ -72,11 +69,11 @@ public final class FixedWidthConfiguration extends BaseObject implements
this.valueWidths = new int[0];
}
- public FixedWidthConfiguration(int columnNameLineNumber, String encoding,
- int[] valueWidths, boolean failOnInconsistentLineWidth) {
+ public FixedWidthConfiguration(int columnNameLineNumber, String encoding, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
this(columnNameLineNumber, null, encoding, valueWidths, failOnInconsistentLineWidth);
}
-
+
public FixedWidthConfiguration(int columnNameLineNumber, ColumnNamingStrategy columnNamingStrategy, String encoding,
int[] valueWidths, boolean failOnInconsistentLineWidth) {
this.encoding = encoding;
@@ -86,7 +83,7 @@ public final class FixedWidthConfiguration extends BaseObject implements
this.columnNamingStrategy = columnNamingStrategy;
this.valueWidths = valueWidths;
}
-
+
public FixedWidthConfiguration(String encoding, List<FixedWidthColumnSpec> columnSpecs) {
this(encoding, columnSpecs, false);
}
@@ -106,84 +103,84 @@ public final class FixedWidthConfiguration extends BaseObject implements
}
/**
- * The line number (1 based) from which to get the names of the columns.
- *
- * @return an int representing the line number of the column headers/names.
- */
- public int getColumnNameLineNumber() {
- return columnNameLineNumber;
- }
-
- /**
- * Gets a {@link ColumnNamingStrategy} to use if needed.
- * @return
- */
- public ColumnNamingStrategy getColumnNamingStrategy() {
- if (columnNamingStrategy == null) {
- return ColumnNamingStrategies.defaultStrategy();
- }
+ * The line number (1 based) from which to get the names of the columns.
+ *
+ * @return an int representing the line number of the column headers/names.
+ */
+ public int getColumnNameLineNumber() {
+ return columnNameLineNumber;
+ }
+
+ /**
+ * Gets a {@link ColumnNamingStrategy} to use if needed.
+ * @return column naming strategy
+ */
+ public ColumnNamingStrategy getColumnNamingStrategy() {
+ if (columnNamingStrategy == null) {
+ return ColumnNamingStrategies.defaultStrategy();
+ }
return columnNamingStrategy;
}
- /**
- * Gets the file encoding to use for reading the file.
- *
- * @return the text encoding to use for reading the file.
- */
- public String getEncoding() {
- return encoding;
- }
-
- /**
- * Gets the width of each value within the fixed width value file.
- *
- * @return the fixed width to use when parsing the file.
- */
- public int getFixedValueWidth() {
- return fixedValueWidth;
- }
-
- public int[] getValueWidths() {
- return valueWidths;
- }
-
- /**
- * Determines if the {@link DataSet#next()} should throw an exception in
- * case of inconsistent line width in the fixed width value file.
- *
- * @return a boolean indicating whether or not to fail on inconsistent line
- * widths.
- */
- public boolean isFailOnInconsistentLineWidth() {
- return failOnInconsistentLineWidth;
- }
-
- @Override
- protected void decorateIdentity(List<Object> identifiers) {
- identifiers.add(columnNameLineNumber);
- identifiers.add(encoding);
- identifiers.add(fixedValueWidth);
- identifiers.add(valueWidths);
- identifiers.add(failOnInconsistentLineWidth);
- }
-
- @Override
- public String toString() {
- return "FixedWidthConfiguration[encoding=" + encoding
- + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="
- + Arrays.toString(valueWidths) + ", columnNameLineNumber="
- + columnNameLineNumber + ", failOnInconsistentLineWidth="
- + failOnInconsistentLineWidth + "]";
- }
-
- public boolean isConstantValueWidth() {
- return fixedValueWidth != -1;
- }
-
- public int getValueWidth(int columnIndex) {
- if (isConstantValueWidth()) {
- return fixedValueWidth;
- }
- return valueWidths[columnIndex];
- }
+ /**
+ * Gets the file encoding to use for reading the file.
+ *
+ * @return the text encoding to use for reading the file.
+ */
+ public String getEncoding() {
+ return encoding;
+ }
+
+ /**
+ * Gets the width of each value within the fixed width value file.
+ *
+ * @return the fixed width to use when parsing the file.
+ */
+ public int getFixedValueWidth() {
+ return fixedValueWidth;
+ }
+
+ public int[] getValueWidths() {
+ return valueWidths;
+ }
+
+ /**
+ * Determines if the {@link DataSet#next()} should throw an exception in
+ * case of inconsistent line width in the fixed width value file.
+ *
+ * @return a boolean indicating whether or not to fail on inconsistent line
+ * widths.
+ */
+ public boolean isFailOnInconsistentLineWidth() {
+ return failOnInconsistentLineWidth;
+ }
+
+ @Override
+ protected void decorateIdentity(List<Object> identifiers) {
+ identifiers.add(columnNameLineNumber);
+ identifiers.add(encoding);
+ identifiers.add(fixedValueWidth);
+ identifiers.add(valueWidths);
+ identifiers.add(failOnInconsistentLineWidth);
+ }
+
+ @Override
+ public String toString() {
+ return "FixedWidthConfiguration[encoding=" + encoding
+ + ", fixedValueWidth=" + fixedValueWidth + ", valueWidths="
+ + Arrays.toString(valueWidths) + ", columnNameLineNumber="
+ + columnNameLineNumber + ", failOnInconsistentLineWidth="
+ + failOnInconsistentLineWidth + "]";
+ }
+
+ public boolean isConstantValueWidth() {
+ return fixedValueWidth != -1;
+ }
+
+ public int getValueWidth(int columnIndex) {
+ if (isConstantValueWidth()) {
+ return fixedValueWidth;
+ }
+ return valueWidths[columnIndex];
+ }
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
index 9154e5e..264287f 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationReader.java
@@ -60,10 +60,9 @@ public class FixedWidthConfigurationReader {
* "http://support.sas.com/documentation/cdl/en/etlug/67323/HTML/default/viewer.htm#p0h03yig7fp1qan1arghp3lwjqi6.htm">
* described here</a>.
*
- * @param encoding
- * @param resource
- * the format file resource
- * @param failOnInconsistentLineWidth
+ * @param encoding the format file encoding
+ * @param resource the format file resource
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not
* @return a {@link FixedWidthConfiguration} object to use
*/
public FixedWidthConfiguration readFromSasFormatFile(String encoding, Resource resource,
@@ -88,13 +87,11 @@ public class FixedWidthConfigurationReader {
/**
* Reads a {@link FixedWidthConfiguration} based on a SAS INPUT declaration.
- * The reader method also optionally will look for a LABEL defintion for
- * column naming.
+ * The reader method also optionally will look for a LABEL definition for column naming.
*
- * @param encoding
- * @param resource
- * the format file resource
- * @param failOnInconsistentLineWidth
+ * @param encoding the format file encoding
+ * @param resource the format file resource
+ * @param failOnInconsistentLineWidth flag specifying whether inconsistent line should stop processing or not
* @return a {@link FixedWidthConfiguration} object to use
*/
public FixedWidthConfiguration readFromSasInputDefinition(String encoding, Resource resource,
@@ -176,5 +173,4 @@ public class FixedWidthConfigurationReader {
return new FixedWidthConfiguration(encoding, columnSpecs, failOnInconsistentLineWidth);
}
-
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
index d28a0b2..027cdab 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataContext.java
@@ -18,9 +18,9 @@
*/
package org.apache.metamodel.fixedwidth;
+import java.io.BufferedInputStream;
import java.io.File;
import java.io.InputStream;
-import java.io.Reader;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.QueryPostprocessDataContext;
@@ -106,7 +106,7 @@ public class FixedWidthDataContext extends QueryPostprocessDataContext {
/**
* Gets the resource being read
*
- * @return
+ * @return a {@link Resource} object
*/
public Resource getResource() {
return _resource;
@@ -184,16 +184,23 @@ public class FixedWidthDataContext extends QueryPostprocessDataContext {
private FixedWidthReader createReader() {
final InputStream inputStream = _resource.read();
- final Reader fileReader = FileHelper.getReader(inputStream, _configuration.getEncoding());
final FixedWidthReader reader;
- if (_configuration.isConstantValueWidth()) {
- reader = new FixedWidthReader(fileReader, _configuration.getFixedValueWidth(), _configuration
- .isFailOnInconsistentLineWidth());
+
+ if (_configuration instanceof EbcdicConfiguration) {
+ reader = new EbcdicReader((BufferedInputStream) inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth(),
+ ((EbcdicConfiguration) _configuration).isSkipEbcdicHeader(),
+ ((EbcdicConfiguration) _configuration).isEolPresent());
} else {
- reader = new FixedWidthReader(fileReader, _configuration.getValueWidths(), _configuration
- .isFailOnInconsistentLineWidth());
+ if (_configuration.isConstantValueWidth()) {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getFixedValueWidth(), _configuration.isFailOnInconsistentLineWidth());
+ } else {
+ reader = new FixedWidthReader(inputStream, _configuration.getEncoding(),
+ _configuration.getValueWidths(), _configuration.isFailOnInconsistentLineWidth());
+ }
}
+
return reader;
}
-
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
index 44ce808..4f78bab 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthDataSet.java
@@ -98,8 +98,7 @@ class FixedWidthDataSet extends AbstractDataSet {
if (columnNumber < stringValues.length) {
rowValues[i] = stringValues[columnNumber];
} else {
- // Ticket #125: Missing values should be enterpreted as
- // null.
+ // Ticket #125: Missing values should be interpreted as null.
rowValues[i] = null;
}
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
index d7a18cf..da17ff1 100644
--- a/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
+++ b/fixedwidth/src/main/java/org/apache/metamodel/fixedwidth/FixedWidthReader.java
@@ -18,78 +18,235 @@
*/
package org.apache.metamodel.fixedwidth;
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
-import java.io.Reader;
+import java.io.InputStream;
+import java.text.CharacterIterator;
+import java.text.StringCharacterIterator;
+import java.util.ArrayList;
+import java.util.List;
/**
* Reader capable of separating values based on a fixed width setting.
*/
-final public class FixedWidthReader implements Closeable {
-
- private final BufferedReader _reader;
- private final FixedWidthLineParser _parser;
-
- public FixedWidthReader(Reader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), fixedValueWidth,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int fixedValueWidth,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, fixedValueWidth, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, -1, 0);
- }
-
- public FixedWidthReader(Reader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- this(new BufferedReader(reader), valueWidths,
- failOnInconsistentLineWidth);
- }
-
- public FixedWidthReader(BufferedReader reader, int[] valueWidths,
- boolean failOnInconsistentLineWidth) {
- _reader = reader;
- int fixedValueWidth = -1;
- int expectedLineLength = 0;
- if (fixedValueWidth == -1) {
- for (int i = 0; i < valueWidths.length; i++) {
- expectedLineLength += valueWidths[i];
- }
- }
- final FixedWidthConfiguration fixedWidthConfiguration = new FixedWidthConfiguration(
- FixedWidthConfiguration.NO_COLUMN_NAME_LINE, null, valueWidths, failOnInconsistentLineWidth);
- _parser = new FixedWidthLineParser(fixedWidthConfiguration, expectedLineLength, 0);
- }
-
-
- /***
- * Reads the next line in the file.
- *
- * @return an array of values in the next line, or null if the end of the
- * file has been reached.
- *
- * @throws IllegalStateException
- * if an exception occurs while reading the file.
- */
- public String[] readLine() throws IllegalStateException {
- String line;
+class FixedWidthReader implements Closeable {
+ private static final int END_OF_STREAM = -1;
+ private static final int LINE_FEED = '\n';
+ private static final int CARRIAGE_RETURN = '\r';
+
+ protected final String _charsetName;
+ private final int _fixedValueWidth;
+ private final int[] _valueWidths;
+ private int _valueIndex = 0;
+ private final boolean _failOnInconsistentLineWidth;
+ private final boolean _constantWidth;
+ private volatile int _rowNumber;
+ protected final BufferedInputStream _stream;
+ protected final int _expectedLineLength;
+
+ public FixedWidthReader(InputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, fixedValueWidth, failOnInconsistentLineWidth);
+ }
+
+ private FixedWidthReader(BufferedInputStream stream, String charsetName, int fixedValueWidth,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = fixedValueWidth;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _valueWidths = null;
+ _constantWidth = true;
+ _expectedLineLength = -1;
+ }
+
+ public FixedWidthReader(InputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ this(new BufferedInputStream(stream), charsetName, valueWidths, failOnInconsistentLineWidth);
+ }
+
+ FixedWidthReader(BufferedInputStream stream, String charsetName, int[] valueWidths,
+ boolean failOnInconsistentLineWidth) {
+ _stream = stream;
+ _charsetName = charsetName;
+ _fixedValueWidth = -1;
+ _valueWidths = valueWidths;
+ _failOnInconsistentLineWidth = failOnInconsistentLineWidth;
+ _rowNumber = 0;
+ _constantWidth = false;
+ int expectedLineLength = 0;
+
+ for (final int _valueWidth : _valueWidths) {
+ expectedLineLength += _valueWidth;
+ }
+
+ _expectedLineLength = expectedLineLength;
+ }
+
+ /**
+ * This reads and returns the next record from the file. Usually, it is a line but in case the new line characters
+ * are not present, the length of the content depends on the column-widths setting.
+ *
+ * @return an array of values in the next line, or null if the end of the file has been reached.
+ * @throws IllegalStateException if an exception occurs while reading the file.
+ */
+ public String[] readLine() throws IllegalStateException {
try {
- line = _reader.readLine();
- return _parser.parseLine(line);
+ beforeReadLine();
+ _rowNumber++;
+ return getValues();
} catch (IOException e) {
throw new IllegalStateException(e);
}
- }
-
+ }
+
+ /**
+ * Empty hook that enables special behavior in sub-classed readers (by overriding this method).
+ */
+ protected void beforeReadLine() {
+ return;
+ }
+
+ private String[] getValues() throws IOException {
+ final List<String> values = new ArrayList<>();
+ final String singleRecordData = readSingleRecordData();
+
+ if (singleRecordData == null) {
+ return null;
+ }
+
+ processSingleRecordData(singleRecordData, values);
+ String[] result = values.toArray(new String[values.size()]);
+
+ if (!_failOnInconsistentLineWidth && !_constantWidth) {
+ result = correctResult(result);
+ }
+
+ validateConsistentValue(singleRecordData, result, values.size());
+
+ return result;
+ }
+
+ private void validateConsistentValue(String recordData, String[] result, int valuesSize) {
+ if (!_failOnInconsistentLineWidth) {
+ return;
+ }
+
+ InconsistentValueWidthException inconsistentValueException = null;
+
+ if (_constantWidth) {
+ if (recordData.length() % _fixedValueWidth != 0) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+ } else if (result.length != valuesSize || recordData.length() != _expectedLineLength) {
+ inconsistentValueException = new InconsistentValueWidthException(result, recordData, _rowNumber);
+ }
+
+ if (inconsistentValueException != null) {
+ throw inconsistentValueException;
+ }
+ }
+
+ private void processSingleRecordData(final String singleRecordData, final List<String> values) {
+ StringBuilder nextValue = new StringBuilder();
+ final CharacterIterator it = new StringCharacterIterator(singleRecordData);
+ _valueIndex = 0;
+
+ for (char c = it.first(); c != CharacterIterator.DONE; c = it.next()) {
+ processCharacter(c, nextValue, values, singleRecordData);
+ }
+
+ if (nextValue.length() > 0) {
+ addNewValueIfAppropriate(values, nextValue);
+ }
+ }
+
+ String readSingleRecordData() throws IOException {
+ StringBuilder line = new StringBuilder();
+ int ch;
+
+ for (ch = _stream.read(); !isEndingCharacter(ch); ch = _stream.read()) {
+ line.append((char) ch);
+ }
+
+ if (ch == CARRIAGE_RETURN) {
+ readLineFeedIfFollows();
+ }
+
+ return (line.length()) > 0 ? line.toString() : null;
+ }
+
+ private void readLineFeedIfFollows() throws IOException {
+ _stream.mark(1);
+
+ if (_stream.read() != LINE_FEED) {
+ _stream.reset();
+ }
+ }
+
+ private boolean isEndingCharacter(int ch) {
+ return (ch == CARRIAGE_RETURN || ch == LINE_FEED || ch == END_OF_STREAM);
+ }
+
+ private void processCharacter(char c, StringBuilder nextValue, List<String> values, String recordData) {
+ nextValue.append(c);
+ final int valueWidth = getValueWidth(values, recordData);
+
+ if (nextValue.length() == valueWidth) {
+ addNewValueIfAppropriate(values, nextValue);
+ nextValue.setLength(0); // clear the buffer
+
+ if (_valueWidths != null) {
+ _valueIndex = (_valueIndex + 1) % _valueWidths.length;
+ }
+ }
+ }
+
+ private int getValueWidth(List<String> values, String recordData) {
+ if (_constantWidth) {
+ return _fixedValueWidth;
+ } else {
+ if (_valueIndex >= _valueWidths.length) {
+ if (_failOnInconsistentLineWidth) {
+ String[] result = values.toArray(new String[values.size()]);
+ throw new InconsistentValueWidthException(result, recordData, _rowNumber + 1);
+ } else {
+ return -1; // silently ignore the inconsistency
+ }
+ }
+
+ return _valueWidths[_valueIndex];
+ }
+ }
+
+ private void addNewValueIfAppropriate(List<String> values, StringBuilder nextValue) {
+ if (_valueWidths != null) {
+ if (values.size() < _valueWidths.length) {
+ values.add(nextValue.toString().trim());
+ }
+ } else {
+ values.add(nextValue.toString().trim());
+ }
+ }
+
+ private String[] correctResult(String[] result) {
+ if (result.length != _valueWidths.length) {
+ String[] correctedResult = new String[_valueWidths.length];
+
+ for (int i = 0; i < result.length && i < _valueWidths.length; i++) {
+ correctedResult[i] = result[i];
+ }
+
+ result = correctedResult;
+ }
- @Override
- public void close() throws IOException {
- _reader.close();
- }
+ return result;
+ }
+ @Override
+ public void close() throws IOException {
+ _stream.close();
+ }
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java
new file mode 100644
index 0000000..ea19960
--- /dev/null
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/EBCDICTest.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.metamodel.fixedwidth;
+
+import java.io.File;
+
+import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.schema.Schema;
+import org.apache.metamodel.schema.Table;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class EBCDICTest {
+ private static final int[] COLUMN_WIDTHS = new int[] { 2, 7, 10, 10 };
+ private static final long EXPECTED_ROWS_COUNT = 49; // 50 lines, 1. is a header
+ private static final String ENCODING = "IBM500";
+ private static final String[] EXPECTED_ROWS = new String[] {
+ "Row[values=[01, name-01, surname-01, address-01]]",
+ "Row[values=[02, name-02, surname-02, address-02]]",
+ "Row[values=[03, name-03, surname-03, address-03]]",
+ };
+ private final FixedWidthDataContext _context;
+ private final Table _table;
+
+ public EBCDICTest() {
+ String fileName = "fixed-width-2-7-10-10.ebc";
+ FixedWidthConfiguration configuration = new EbcdicConfiguration(FixedWidthConfiguration.NO_COLUMN_NAME_LINE,
+ ENCODING, COLUMN_WIDTHS, false, true, false);
+ _context = new FixedWidthDataContext(new File("src/test/resources/" + fileName), configuration);
+ Schema schema = _context.getDefaultSchema();
+ _table = schema.getTableByName(fileName);
+ }
+
+ @Test
+ public void testRowsCount() throws Exception {
+ long rows = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectCount().execute()) {
+ if (dataSet.next()) {
+ Object[] values = dataSet.getRow().getValues();
+ rows = (long) values[0];
+ }
+ }
+
+ assertEquals(EXPECTED_ROWS_COUNT, rows);
+ }
+
+ @Test
+ public void testFirstRows() throws Exception {
+ int limit = EXPECTED_ROWS.length;
+ int i = 0;
+
+ try (final DataSet dataSet = _context.query().from(_table).selectAll().limit(limit).execute()) {
+ while (dataSet.next()) {
+ assertEquals(EXPECTED_ROWS[i], dataSet.getRow().toString());
+ i++;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
index 8225be0..f03d633 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthConfigurationTest.java
@@ -18,8 +18,6 @@
*/
package org.apache.metamodel.fixedwidth;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-
import junit.framework.TestCase;
public class FixedWidthConfigurationTest extends TestCase {
@@ -31,14 +29,11 @@ public class FixedWidthConfigurationTest extends TestCase {
}
public void testEquals() throws Exception {
- FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
- FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8",
- 10, true);
+ FixedWidthConfiguration conf1 = new FixedWidthConfiguration(1, "UTF8", 10, true);
+ FixedWidthConfiguration conf2 = new FixedWidthConfiguration(1, "UTF8", 10, true);
assertEquals(conf1, conf2);
- FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8",
- 10, false);
+ FixedWidthConfiguration conf3 = new FixedWidthConfiguration(1, "UTF8", 10, false);
assertFalse(conf1.equals(conf3));
}
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
index 2ac3680..7962cf6 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthDataContextTest.java
@@ -25,9 +25,6 @@ import junit.framework.TestCase;
import org.apache.metamodel.DataContext;
import org.apache.metamodel.data.DataSet;
-import org.apache.metamodel.fixedwidth.FixedWidthConfiguration;
-import org.apache.metamodel.fixedwidth.FixedWidthDataContext;
-import org.apache.metamodel.fixedwidth.InconsistentValueWidthException;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
index 4d11f0e..8f40c1d 100644
--- a/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
+++ b/fixedwidth/src/test/java/org/apache/metamodel/fixedwidth/FixedWidthReaderTest.java
@@ -18,11 +18,9 @@
*/
package org.apache.metamodel.fixedwidth;
-import static org.junit.Assert.assertEquals;
-
-import java.io.BufferedReader;
+import java.io.BufferedInputStream;
import java.io.File;
-import java.io.FileReader;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays;
@@ -30,7 +28,10 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
+import static org.junit.Assert.assertEquals;
+
public class FixedWidthReaderTest {
+ private static final String CHARSET = "UTF-8";
@Rule
public final ExpectedException exception = ExpectedException.none();
@@ -38,9 +39,9 @@ public class FixedWidthReaderTest {
@Test
public void testBufferedReader1() throws IOException {
final File file = new File("src/test/resources/example_simple1.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] { 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -53,9 +54,9 @@ public class FixedWidthReaderTest {
@Test
public void testBufferedReader2() throws IOException {
final File file = new File("src/test/resources/example_simple2.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
int[] widths = new int[] {1, 8, 9 };
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, widths, false)) {
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, widths, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[i, greeting, greeter]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -68,8 +69,8 @@ public class FixedWidthReaderTest {
@Test
public void testBufferedReader3() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, false)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, false)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -84,8 +85,8 @@ public class FixedWidthReaderTest {
@Test
public void testBufferedReaderFailOnInconsistentRows() throws IOException {
final File file = new File("src/test/resources/example_simple3.txt");
- final BufferedReader reader = new BufferedReader(new FileReader(file));
- try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(reader, 5, true)) {
+ final BufferedInputStream stream = new BufferedInputStream(new FileInputStream(file));
+ try (final FixedWidthReader fixedWidthReader = new FixedWidthReader(stream, CHARSET, 5, true)) {
final String[] line1 = fixedWidthReader.readLine();
assertEquals("[hello]", Arrays.asList(line1).toString());
final String[] line2 = fixedWidthReader.readLine();
@@ -98,6 +99,4 @@ public class FixedWidthReaderTest {
final String[] line4 = fixedWidthReader.readLine();
}
}
-
-
}
http://git-wip-us.apache.org/repos/asf/metamodel/blob/a1b9ff7f/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc
----------------------------------------------------------------------
diff --git a/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc b/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc
new file mode 100644
index 0000000..09fcc70
--- /dev/null
+++ b/fixedwidth/src/test/resources/fixed-width-2-7-10-10.ebc
@@ -0,0 +1 @@
+������`���������`���������`����`�����`�����`����`�����`�����`����`�����`�����`������`�������`�����`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`����`�𢤙����`�������`����`�����`�����`����`�����`�����`����`�����`�����`������`�������`����
���`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`����`�𢤙����`�������`����`�����`�����`����`�����`�����`����`�����`�����`������`�������`�����`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������
`���������`��������`���������`���������`����`�𢤙����`�������`����`�����`�����`����`�����`�����`����`�����`�����`������`�������`�����`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`����`�𢤙����`�������`����`�����`�����`����`�����`�����`����`�
������`�����`������`�������`�����`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`��������`���������`���������`��
\ No newline at end of file