You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2017/08/11 21:05:29 UTC

commons-csv git commit: [CSV-214] Adding a placeholder in the Lexer and CSV parser to store the end-of-line string. I applied the patch in spirit and made changes: there is no need to use a boolean to track the state of the EOL String (set vs. not set).

Repository: commons-csv
Updated Branches:
  refs/heads/master 4d0f22600 -> aae6f9044


[CSV-214] Adding a placeholder in the Lexer and CSV parser to store the
end-of-line string. I applied the patch in spirit and made changes:
there is no need to use a boolean to track the state of the EOL String
(set vs. not set). I also allowed for CR to be saved as an EOL string
since we allow that already.

Project: http://git-wip-us.apache.org/repos/asf/commons-csv/repo
Commit: http://git-wip-us.apache.org/repos/asf/commons-csv/commit/aae6f904
Tree: http://git-wip-us.apache.org/repos/asf/commons-csv/tree/aae6f904
Diff: http://git-wip-us.apache.org/repos/asf/commons-csv/diff/aae6f904

Branch: refs/heads/master
Commit: aae6f90442ca09e2461e766a987b33316d9fa6be
Parents: 4d0f226
Author: Gary Gregory <ga...@gmail.com>
Authored: Fri Aug 11 15:05:27 2017 -0600
Committer: Gary Gregory <ga...@gmail.com>
Committed: Fri Aug 11 15:05:27 2017 -0600

----------------------------------------------------------------------
 src/changes/changes.xml                         |  1 +
 .../java/org/apache/commons/csv/CSVParser.java  | 10 +++++++
 src/main/java/org/apache/commons/csv/Lexer.java | 21 ++++++++++++++
 .../org/apache/commons/csv/CSVParserTest.java   | 30 ++++++++++++++++++++
 4 files changed, 62 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/changes/changes.xml
----------------------------------------------------------------------
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 95ad0b5..744ffc8 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -51,6 +51,7 @@
       <action issue="CSV-192" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVParser.parse(Path, Charset, CSVFormat)</action>
       <action issue="CSV-205" type="add" dev="ggregory" due-to="Gary Gregory">Add convenience API CSVFormat#printer() to print to System.out</action>
       <action issue="CSV-207" type="add" dev="ggregory" due-to="Gary Gregory">Provide a CSV Format for printing PostgreSQL CSV and Text formats.</action>
+      <action issue="CSV-214" type="add" dev="ggregory" due-to="Nitin Mahendru, Gary Gregory">Adding a placeholder in the Lexer and CSV parser to store the end-of-line string.</action>
     </release>
     <release version="1.4" date="2016-05-28" description="Feature and bug fix release">
       <action issue="CSV-181" type="update" dev="ggregory" due-to="Gary Gregory">Make CSVPrinter.print(Object) GC-free.</action>

http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/main/java/org/apache/commons/csv/CSVParser.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/csv/CSVParser.java b/src/main/java/org/apache/commons/csv/CSVParser.java
index efc0d86..2e4d662 100644
--- a/src/main/java/org/apache/commons/csv/CSVParser.java
+++ b/src/main/java/org/apache/commons/csv/CSVParser.java
@@ -397,6 +397,16 @@ public final class CSVParser implements Iterable<CSVRecord>, Closeable {
     }
 
     /**
+     * Gets the first end-of-line string encountered.
+     * 
+     * @return the first end-of-line string
+     * @since 1.5  
+     */
+    public String getFirstEndOfLine() {
+        return lexer.getFirstEol();
+    }
+
+    /**
      * Returns a copy of the header map that iterates in column order.
      * <p>
      * The map keys are column names. The map values are 0-based indices.

http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/main/java/org/apache/commons/csv/Lexer.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/commons/csv/Lexer.java b/src/main/java/org/apache/commons/csv/Lexer.java
index 0329c35..027e41e 100644
--- a/src/main/java/org/apache/commons/csv/Lexer.java
+++ b/src/main/java/org/apache/commons/csv/Lexer.java
@@ -40,6 +40,9 @@ import java.io.IOException;
  */
 final class Lexer implements Closeable {
 
+    private static final String CR_STRING = Character.toString(Constants.CR);
+    private static final String LF_STRING = Character.toString(Constants.LF);
+
     /**
      * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
      * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
@@ -57,7 +60,12 @@ final class Lexer implements Closeable {
 
     /** The input stream */
     private final ExtendedBufferedReader reader;
+    private String firstEol;
 
+    String getFirstEol(){
+        return firstEol;
+    }
+    
     Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
         this.reader = reader;
         this.delimiter = format.getDelimiter();
@@ -374,7 +382,20 @@ final class Lexer implements Closeable {
         if (ch == CR && reader.lookAhead() == LF) {
             // note: does not change ch outside of this method!
             ch = reader.read();
+            // Save the EOL state
+            if (firstEol == null) {
+                this.firstEol = Constants.CRLF;
+            }
         }
+        // save EOL state here.
+        if (firstEol == null) {
+            if (ch == LF) {
+                this.firstEol = LF_STRING;
+            } else if (ch == CR) {
+                this.firstEol = CR_STRING;
+            }
+        }
+
         return ch == LF || ch == CR;
     }
 

http://git-wip-us.apache.org/repos/asf/commons-csv/blob/aae6f904/src/test/java/org/apache/commons/csv/CSVParserTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index c547b0d..cffd143 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -234,6 +234,36 @@ public class CSVParserTest {
             assertEquals(4, records.size());
         }
     }
+    
+    @Test
+    public void testFirstEndOfLineCrLf() throws IOException {
+        final String data = "foo\r\nbaar,\r\nhello,world\r\n,kanu";
+        try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
+            final List<CSVRecord> records = parser.getRecords();
+            assertEquals(4, records.size());
+            assertEquals("\r\n", parser.getFirstEndOfLine());
+        }
+    }
+    
+    @Test
+    public void testFirstEndOfLineLf() throws IOException {
+        final String data = "foo\nbaar,\nhello,world\n,kanu";
+        try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
+            final List<CSVRecord> records = parser.getRecords();
+            assertEquals(4, records.size());
+            assertEquals("\n", parser.getFirstEndOfLine());
+        }
+    }
+
+    @Test
+    public void testFirstEndOfLineCr() throws IOException {
+        final String data = "foo\rbaar,\rhello,world\r,kanu";
+        try (final CSVParser parser = CSVParser.parse(data, CSVFormat.DEFAULT)) {
+            final List<CSVRecord> records = parser.getRecords();
+            assertEquals(4, records.size());
+            assertEquals("\r", parser.getFirstEndOfLine());
+        }
+    }
 
     @Test(expected = NoSuchElementException.class)
     public void testClose() throws Exception {