You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2014/06/22 20:12:03 UTC
git commit: METAMODEL-62: Fixed fault-tolerant handling of malformed
CSV lines in single-line CSV mode
Repository: incubator-metamodel
Updated Branches:
refs/heads/master 1d7270c12 -> fa16e3741
METAMODEL-62: Fixed fault-tolerant handling of malformed CSV lines in
single-line CSV mode
Project: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/commit/fa16e374
Tree: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/tree/fa16e374
Diff: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/diff/fa16e374
Branch: refs/heads/master
Commit: fa16e374139bb31b1f651f3bfa47614ab24b6533
Parents: 1d7270c
Author: Kasper Sørensen <i....@gmail.com>
Authored: Thu Jun 19 23:48:32 2014 +0200
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Thu Jun 19 23:48:32 2014 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../apache/metamodel/csv/SingleLineCsvRow.java | 30 ++++++++++++++------
.../metamodel/csv/SingleLineCsvDataSetTest.java | 22 ++++++++++++--
csv/src/test/resources/csv_malformed_line.txt | 4 +++
4 files changed, 47 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 11b2f89..a7cb553 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ Apache MetaModel 4.1.0-incubating
* [METAMODEL-59] - Fixed a bug related to handling of date/time literals in MS SQL Server queries.
* [METAMODEL-60] - Fixed a bug related to DISTINCT and TOP keywords in MS SQL Server queries.
* [METAMODEL-45] - Improved and standardized way of handling integration test connection information towards external databases.
+ * [METAMODEL-62] - Fixed a bug related to fault-tolerant handling of malformed CSV lines when reading CSVs in single-line mode
Apache MetaModel 4.0.0-incubating
http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
index 0adf72f..bdd3beb 100644
--- a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
+++ b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
@@ -25,6 +25,8 @@ import org.apache.metamodel.data.AbstractRow;
import org.apache.metamodel.data.DataSetHeader;
import org.apache.metamodel.data.Style;
import org.apache.metamodel.schema.Column;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import au.com.bytecode.opencsv.CSVParser;
@@ -35,6 +37,8 @@ final class SingleLineCsvRow extends AbstractRow {
private static final long serialVersionUID = 1L;
+ private static final Logger logger = LoggerFactory.getLogger(SingleLineCsvRow.class);
+
private final SingleLineCsvDataSet _dataSet;
private final String _line;
private final int _columnsInTable;
@@ -54,13 +58,7 @@ final class SingleLineCsvRow extends AbstractRow {
private String[] getValuesInternal() {
if (_values == null) {
- final CSVParser parser = _dataSet.getCsvParser();
- final String[] csvValues;
- try {
- csvValues = parser.parseLine(_line);
- } catch (IOException e) {
- throw new MetaModelException("Failed to parse CSV line no. " + _rowNumber + ": " + _line);
- }
+ final String[] csvValues = parseLine();
if (_failOnInconsistentRowLength) {
if (_columnsInTable != csvValues.length) {
@@ -81,7 +79,7 @@ final class SingleLineCsvRow extends AbstractRow {
if (columnNumber < csvValues.length) {
rowValues[i] = csvValues[columnNumber];
} else {
- // Ticket #125: Missing values should be enterpreted as
+ // Ticket #125: Missing values should be interpreted as
// null.
rowValues[i] = null;
}
@@ -92,6 +90,22 @@ final class SingleLineCsvRow extends AbstractRow {
return _values;
}
+ private String[] parseLine() {
+ try {
+ final CSVParser parser = _dataSet.getCsvParser();
+ return parser.parseLine(_line);
+ } catch (IOException e) {
+ if (_failOnInconsistentRowLength) {
+ throw new MetaModelException("Failed to parse CSV line no. " + _rowNumber + ": " + _line, e);
+ } else {
+ logger.warn("Encountered unparseable line no. {}, returning line as a single value with trailing nulls: {}", _rowNumber, _line);
+ String[] csvValues = new String[_columnsInTable];
+ csvValues[0] = _line;
+ return csvValues;
+ }
+ }
+ }
+
@Override
public Object getValue(int index) throws IndexOutOfBoundsException {
final String[] values = getValuesInternal();
http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
----------------------------------------------------------------------
diff --git a/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java b/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
index e41db1b..1489016 100644
--- a/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
+++ b/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
@@ -19,11 +19,13 @@
package org.apache.metamodel.csv;
import java.io.File;
-
-import org.apache.metamodel.data.DataSet;
+import java.util.Arrays;
import junit.framework.TestCase;
+import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.schema.Table;
+
public class SingleLineCsvDataSetTest extends TestCase {
public void testGetValueInNonPhysicalOrder() throws Exception {
@@ -46,4 +48,20 @@ public class SingleLineCsvDataSetTest extends TestCase {
dataSet.close();
}
+
+ public void testMalformedLineParsing() throws Exception {
+ CsvConfiguration configuration = new CsvConfiguration(1, false, false);
+ CsvDataContext dc = new CsvDataContext(new File("src/test/resources/csv_malformed_line.txt"), configuration);
+
+ Table table = dc.getDefaultSchema().getTable(0);
+ DataSet ds = dc.query().from(table).selectAll().execute();
+ assertTrue(ds.next());
+ assertEquals("[foo, bar, baz]", Arrays.toString(ds.getRow().getValues()));
+ assertTrue(ds.next());
+ assertEquals("[\", null, null]", Arrays.toString(ds.getRow().getValues()));
+ assertTrue(ds.next());
+ assertEquals("[hello, there, world]", Arrays.toString(ds.getRow().getValues()));
+ assertFalse(ds.next());
+ ds.close();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/test/resources/csv_malformed_line.txt
----------------------------------------------------------------------
diff --git a/csv/src/test/resources/csv_malformed_line.txt b/csv/src/test/resources/csv_malformed_line.txt
new file mode 100644
index 0000000..68a2cc4
--- /dev/null
+++ b/csv/src/test/resources/csv_malformed_line.txt
@@ -0,0 +1,4 @@
+col1,col2,col3
+"foo","bar","baz"
+"
+hello,there,world
\ No newline at end of file