You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@metamodel.apache.org by ka...@apache.org on 2014/06/22 20:12:03 UTC

git commit: METAMODEL-62: Fixed fault-tolerant handling of malformed CSV lines in single-line CSV mode

Repository: incubator-metamodel
Updated Branches:
  refs/heads/master 1d7270c12 -> fa16e3741


METAMODEL-62: Fixed fault-tolerant handling of malformed CSV lines in
single-line CSV mode

Project: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/commit/fa16e374
Tree: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/tree/fa16e374
Diff: http://git-wip-us.apache.org/repos/asf/incubator-metamodel/diff/fa16e374

Branch: refs/heads/master
Commit: fa16e374139bb31b1f651f3bfa47614ab24b6533
Parents: 1d7270c
Author: Kasper Sørensen <i....@gmail.com>
Authored: Thu Jun 19 23:48:32 2014 +0200
Committer: Kasper Sørensen <i....@gmail.com>
Committed: Thu Jun 19 23:48:32 2014 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/metamodel/csv/SingleLineCsvRow.java  | 30 ++++++++++++++------
 .../metamodel/csv/SingleLineCsvDataSetTest.java | 22 ++++++++++++--
 csv/src/test/resources/csv_malformed_line.txt   |  4 +++
 4 files changed, 47 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 11b2f89..a7cb553 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -20,6 +20,7 @@ Apache MetaModel 4.1.0-incubating
  * [METAMODEL-59] - Fixed a bug related to handling of date/time literals in MS SQL Server queries.
  * [METAMODEL-60] - Fixed a bug related to DISTINCT and TOP keywords in MS SQL Server queries.
  * [METAMODEL-45] - Improved and standardized way of handling integration test connection information towards external databases.
+ * [METAMODEL-62] - Fixed a bug related to fault-tolerant handling of malformed CSV lines when reading CSVs in single-line mode
 
 Apache MetaModel 4.0.0-incubating
 

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
----------------------------------------------------------------------
diff --git a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
index 0adf72f..bdd3beb 100644
--- a/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
+++ b/csv/src/main/java/org/apache/metamodel/csv/SingleLineCsvRow.java
@@ -25,6 +25,8 @@ import org.apache.metamodel.data.AbstractRow;
 import org.apache.metamodel.data.DataSetHeader;
 import org.apache.metamodel.data.Style;
 import org.apache.metamodel.schema.Column;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import au.com.bytecode.opencsv.CSVParser;
 
@@ -35,6 +37,8 @@ final class SingleLineCsvRow extends AbstractRow {
 
     private static final long serialVersionUID = 1L;
 
+    private static final Logger logger = LoggerFactory.getLogger(SingleLineCsvRow.class);
+    
     private final SingleLineCsvDataSet _dataSet;
     private final String _line;
     private final int _columnsInTable;
@@ -54,13 +58,7 @@ final class SingleLineCsvRow extends AbstractRow {
 
     private String[] getValuesInternal() {
         if (_values == null) {
-            final CSVParser parser = _dataSet.getCsvParser();
-            final String[] csvValues;
-            try {
-                csvValues = parser.parseLine(_line);
-            } catch (IOException e) {
-                throw new MetaModelException("Failed to parse CSV line no. " + _rowNumber + ": " + _line);
-            }
+            final String[] csvValues = parseLine();
 
             if (_failOnInconsistentRowLength) {
                 if (_columnsInTable != csvValues.length) {
@@ -81,7 +79,7 @@ final class SingleLineCsvRow extends AbstractRow {
                 if (columnNumber < csvValues.length) {
                     rowValues[i] = csvValues[columnNumber];
                 } else {
-                    // Ticket #125: Missing values should be enterpreted as
+                    // Ticket #125: Missing values should be interpreted as
                     // null.
                     rowValues[i] = null;
                 }
@@ -92,6 +90,22 @@ final class SingleLineCsvRow extends AbstractRow {
         return _values;
     }
 
+    private String[] parseLine() {
+        try {
+            final CSVParser parser = _dataSet.getCsvParser();
+            return parser.parseLine(_line);
+        } catch (IOException e) {
+            if (_failOnInconsistentRowLength) {
+                throw new MetaModelException("Failed to parse CSV line no. " + _rowNumber + ": " + _line, e);
+            } else {
+                logger.warn("Encountered unparseable line no. {}, returning line as a single value with trailing nulls: {}", _rowNumber, _line);
+                String[] csvValues = new String[_columnsInTable];
+                csvValues[0] = _line;
+                return csvValues;
+            }
+        }
+    }
+
     @Override
     public Object getValue(int index) throws IndexOutOfBoundsException {
         final String[] values = getValuesInternal();

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
----------------------------------------------------------------------
diff --git a/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java b/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
index e41db1b..1489016 100644
--- a/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
+++ b/csv/src/test/java/org/apache/metamodel/csv/SingleLineCsvDataSetTest.java
@@ -19,11 +19,13 @@
 package org.apache.metamodel.csv;
 
 import java.io.File;
-
-import org.apache.metamodel.data.DataSet;
+import java.util.Arrays;
 
 import junit.framework.TestCase;
 
+import org.apache.metamodel.data.DataSet;
+import org.apache.metamodel.schema.Table;
+
 public class SingleLineCsvDataSetTest extends TestCase {
 
     public void testGetValueInNonPhysicalOrder() throws Exception {
@@ -46,4 +48,20 @@ public class SingleLineCsvDataSetTest extends TestCase {
 
         dataSet.close();
     }
+
+    public void testMalformedLineParsing() throws Exception {
+        CsvConfiguration configuration = new CsvConfiguration(1, false, false);
+        CsvDataContext dc = new CsvDataContext(new File("src/test/resources/csv_malformed_line.txt"), configuration);
+
+        Table table = dc.getDefaultSchema().getTable(0);
+        DataSet ds = dc.query().from(table).selectAll().execute();
+        assertTrue(ds.next());
+        assertEquals("[foo, bar, baz]", Arrays.toString(ds.getRow().getValues()));
+        assertTrue(ds.next());
+        assertEquals("[\", null, null]", Arrays.toString(ds.getRow().getValues()));
+        assertTrue(ds.next());
+        assertEquals("[hello, there, world]", Arrays.toString(ds.getRow().getValues()));
+        assertFalse(ds.next());
+        ds.close();
+    }
 }

http://git-wip-us.apache.org/repos/asf/incubator-metamodel/blob/fa16e374/csv/src/test/resources/csv_malformed_line.txt
----------------------------------------------------------------------
diff --git a/csv/src/test/resources/csv_malformed_line.txt b/csv/src/test/resources/csv_malformed_line.txt
new file mode 100644
index 0000000..68a2cc4
--- /dev/null
+++ b/csv/src/test/resources/csv_malformed_line.txt
@@ -0,0 +1,4 @@
+col1,col2,col3
+"foo","bar","baz"
+"
+hello,there,world
\ No newline at end of file