You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by gg...@apache.org on 2020/05/28 12:10:23 UTC
[commons-csv] branch master updated: Move resources to component
specific folder.
This is an automated email from the ASF dual-hosted git repository.
ggregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-csv.git
The following commit(s) were added to refs/heads/master by this push:
new 914f2c4 Move resources to component specific folder.
914f2c4 is described below
commit 914f2c41557bdd3dcde8d10d6f128fa3bb307b11
Author: Gary Gregory <ga...@gmail.com>
AuthorDate: Thu May 28 08:10:18 2020 -0400
Move resources to component specific folder.
---
pom.xml | 34 +-
.../org/apache/commons/csv/CSVFileParserTest.java | 4 +-
.../java/org/apache/commons/csv/CSVParserTest.java | 14 +-
.../org/apache/commons/csv/CSVPrinterTest.java | 2 +-
.../apache/commons/csv/issues/JiraCsv167Test.java | 2 +-
.../apache/commons/csv/issues/JiraCsv198Test.java | 2 +-
.../apache/commons/csv/issues/JiraCsv248Test.java | 2 +-
.../commons/csv}/CSV-198/optd_por_public.csv | 0
.../commons/csv}/CSV-213/999751170.patch.csv | 0
.../apache/commons/csv}/CSV-248/csvRecord.bin | Bin
.../apache/commons/csv}/CSV-259/sample.txt | 0
.../apache/commons/csv}/CSVFileParser/README.txt | 0
.../apache/commons/csv}/CSVFileParser/bom.csv | 0
.../apache/commons/csv}/CSVFileParser/test.csv | 32 +-
.../commons/csv}/CSVFileParser/testCSV246.csv | 0
.../testCSV246_checkWithNoComment.txt | 0
.../commons/csv}/CSVFileParser/testCSV85.csv | 16 +-
.../csv}/CSVFileParser/testCSV85_default.txt | 0
.../csv}/CSVFileParser/testCSV85_ignoreEmpty.txt | 0
.../commons/csv}/CSVFileParser/test_default.txt | 0
.../csv}/CSVFileParser/test_default_comment.txt | 0
.../commons/csv}/CSVFileParser/test_rfc4180.txt | 0
.../csv}/CSVFileParser/test_rfc4180_trim.txt | 0
.../org/apache/commons/csv/CSVFormat.java | 2330 ++++++++++++++++++++
.../org/apache/commons/csv/CSVParser.java | 715 ++++++
.../org/apache/commons/csv/CSVPrinter.java | 391 ++++
.../org/apache/commons/csv/CSVRecord.java | 329 +++
.../org/apache/commons/csv/Constants.java | 82 +
.../apache/commons/csv/ExtendedBufferedReader.java | 191 ++
.../resources/org/apache/commons/csv/IOUtils.java | 139 ++
.../resources/org/apache/commons/csv/Lexer.java | 461 ++++
.../org/apache/commons/csv/QuoteMode.java | 50 +
.../resources/org/apache/commons/csv/Token.java | 73 +
.../apache/commons/csv}/csv-167/sample1.csv | 0
.../org/apache/commons/csv/package-info.java | 82 +
.../apache/commons/csv}/perf/worldcitiespop.txt.gz | Bin
36 files changed, 4899 insertions(+), 52 deletions(-)
diff --git a/pom.xml b/pom.xml
index f04501c..bd5ec20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -240,25 +240,25 @@
<configuration>
<excludes>
<!-- These files are used as test data and test result specifications. -->
- <exclude>src/test/resources/csv-167/sample1.csv</exclude>
- <exclude>src/test/resources/CSV-198/optd_por_public.csv</exclude>
- <exclude>src/test/resources/CSV-213/999751170.patch.csv</exclude>
- <exclude>src/test/resources/CSVFileParser/bom.csv</exclude>
- <exclude>src/test/resources/CSVFileParser/test.csv</exclude>
- <exclude>src/test/resources/CSVFileParser/test_default.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/test_default_comment.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/test_rfc4180.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/test_rfc4180_trim.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/testCSV85.csv</exclude>
- <exclude>src/test/resources/CSVFileParser/testCSV85_default.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/testCSV85_ignoreEmpty.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/csv-167/sample1.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt</exclude>
<!-- The ferc.gov files are included discussion in https://issues.apache.org/jira/browse/LEGAL-175. -->
- <exclude>src/test/resources/ferc.gov/contract.txt</exclude>
- <exclude>src/test/resources/ferc.gov/transaction.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/ferc.gov/contract.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/ferc.gov/transaction.txt</exclude>
<exclude>src/test/resources/**/*.bin</exclude>
- <exclude>src/test/resources/CSV-259/sample.txt</exclude>
- <exclude>src/test/resources/CSVFileParser/testCSV246.csv</exclude>
- <exclude>src/test/resources/CSVFileParser/testCSV246_checkWithNoComment.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSV-259/sample.txt</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv</exclude>
+ <exclude>src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt</exclude>
</excludes>
</configuration>
</plugin>
diff --git a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java
index 413ef61..2628f52 100644
--- a/src/test/java/org/apache/commons/csv/CSVFileParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVFileParserTest.java
@@ -40,7 +40,7 @@ import org.junit.jupiter.params.provider.MethodSource;
*/
public class CSVFileParserTest {
- private static final File BASE = new File("src/test/resources/CSVFileParser");
+ private static final File BASE = new File("src/test/resources/org/apache/commons/csv/CSVFileParser");
private String readTestData(final BufferedReader reader) throws IOException {
String line;
@@ -134,7 +134,7 @@ public class CSVFileParserTest {
assertEquals(line, format.toString(), testFile.getName() + " Expected format ");
// Now parse the file and compare against the expected results
- final URL resource = ClassLoader.getSystemResource("CSVFileParser/" + split[0]);
+ final URL resource = ClassLoader.getSystemResource("org/apache/commons/csv/CSVFileParser/" + split[0]);
try (final CSVParser parser = CSVParser.parse(resource, Charset.forName("UTF-8"), format)) {
for (final CSVRecord record : parser) {
String parsed = Arrays.toString(record.values());
diff --git a/src/test/java/org/apache/commons/csv/CSVParserTest.java b/src/test/java/org/apache/commons/csv/CSVParserTest.java
index 87e8a5a..d058cc4 100644
--- a/src/test/java/org/apache/commons/csv/CSVParserTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVParserTest.java
@@ -178,7 +178,7 @@ public class CSVParserTest {
@Test
@Disabled("CSV-107")
public void testBOM() throws IOException {
- final URL url = ClassLoader.getSystemClassLoader().getResource("CSVFileParser/bom.csv");
+ final URL url = ClassLoader.getSystemClassLoader().getResource("org/apache/commons/csv/CSVFileParser/bom.csv");
try (final CSVParser parser = CSVParser.parse(url, Charset.forName(UTF_8_NAME), CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
@@ -190,7 +190,7 @@ public class CSVParserTest {
@Test
public void testBOMInputStream_ParserWithInputStream() throws IOException {
- try (final BOMInputStream inputStream = createBOMInputStream("CSVFileParser/bom.csv");
+ try (final BOMInputStream inputStream = createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv");
final CSVParser parser = CSVParser.parse(inputStream, UTF_8, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
@@ -202,7 +202,9 @@ public class CSVParserTest {
@Test
public void testBOMInputStream_ParserWithReader() throws IOException {
- try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
+ try (
+ final Reader reader = new InputStreamReader(
+ createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = new CSVParser(reader, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
@@ -214,7 +216,9 @@ public class CSVParserTest {
@Test
public void testBOMInputStream_parseWithReader() throws IOException {
- try (final Reader reader = new InputStreamReader(createBOMInputStream("CSVFileParser/bom.csv"), UTF_8_NAME);
+ try (
+ final Reader reader = new InputStreamReader(
+ createBOMInputStream("org/apache/commons/csv/CSVFileParser/bom.csv"), UTF_8_NAME);
final CSVParser parser = CSVParser.parse(reader, CSVFormat.EXCEL.withHeader())) {
for (final CSVRecord record : parser) {
final String string = record.get("Date");
@@ -941,7 +945,7 @@ public class CSVParserTest {
@Test
public void testParse() throws Exception {
final ClassLoader loader = ClassLoader.getSystemClassLoader();
- final URL url = loader.getResource("CSVFileParser/test.csv");
+ final URL url = loader.getResource("org/apache/commons/csv/CSVFileParser/test.csv");
final CSVFormat format = CSVFormat.DEFAULT.withHeader("A", "B", "C", "D");
final Charset charset = StandardCharsets.UTF_8;
diff --git a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
index d3c56ee..c7e305e 100644
--- a/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
+++ b/src/test/java/org/apache/commons/csv/CSVPrinterTest.java
@@ -315,7 +315,7 @@ public class CSVPrinterTest {
@Test
public void testCSV259() throws IOException {
final StringWriter sw = new StringWriter();
- final Reader reader = new FileReader("src/test/resources/CSV-259/sample.txt");
+ final Reader reader = new FileReader("src/test/resources/org/apache/commons/csv/CSV-259/sample.txt");
try (final CSVPrinter printer = new CSVPrinter(sw, CSVFormat.DEFAULT.withEscape('!').withQuote(null))) {
printer.print(reader);
assertEquals("x!,y!,z", sw.toString());
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java
index 0e6e49f..5b8a20e 100644
--- a/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv167Test.java
@@ -83,7 +83,7 @@ public class JiraCsv167Test {
}
private Reader getTestInput() {
- final InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream("csv-167/sample1.csv");
+ final InputStream is = ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/csv-167/sample1.csv");
return new InputStreamReader(is);
}
}
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java
index f97c48d..307610a 100644
--- a/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv198Test.java
@@ -34,7 +34,7 @@ public class JiraCsv198Test {
@Test
public void test() throws UnsupportedEncodingException, IOException {
- final InputStream pointsOfReference = getClass().getResourceAsStream("/CSV-198/optd_por_public.csv");
+ final InputStream pointsOfReference = getClass().getResourceAsStream("/org/apache/commons/csv/CSV-198/optd_por_public.csv");
assertNotNull(pointsOfReference);
try (@SuppressWarnings("resource")
CSVParser parser = CSV_FORMAT.parse(new InputStreamReader(pointsOfReference, "UTF-8"))) {
diff --git a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java
index bf640b6..1005e83 100644
--- a/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java
+++ b/src/test/java/org/apache/commons/csv/issues/JiraCsv248Test.java
@@ -75,6 +75,6 @@ public class JiraCsv248Test {
}
private static InputStream getTestInput() {
- return ClassLoader.getSystemClassLoader().getResourceAsStream("CSV-248/csvRecord.bin");
+ return ClassLoader.getSystemClassLoader().getResourceAsStream("org/apache/commons/csv/CSV-248/csvRecord.bin");
}
}
diff --git a/src/test/resources/CSV-198/optd_por_public.csv b/src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv
similarity index 100%
rename from src/test/resources/CSV-198/optd_por_public.csv
rename to src/test/resources/org/apache/commons/csv/CSV-198/optd_por_public.csv
diff --git a/src/test/resources/CSV-213/999751170.patch.csv b/src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv
similarity index 100%
rename from src/test/resources/CSV-213/999751170.patch.csv
rename to src/test/resources/org/apache/commons/csv/CSV-213/999751170.patch.csv
diff --git a/src/test/resources/CSV-248/csvRecord.bin b/src/test/resources/org/apache/commons/csv/CSV-248/csvRecord.bin
similarity index 100%
rename from src/test/resources/CSV-248/csvRecord.bin
rename to src/test/resources/org/apache/commons/csv/CSV-248/csvRecord.bin
diff --git a/src/test/resources/CSV-259/sample.txt b/src/test/resources/org/apache/commons/csv/CSV-259/sample.txt
similarity index 100%
rename from src/test/resources/CSV-259/sample.txt
rename to src/test/resources/org/apache/commons/csv/CSV-259/sample.txt
diff --git a/src/test/resources/CSVFileParser/README.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/README.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/README.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/README.txt
diff --git a/src/test/resources/CSVFileParser/bom.csv b/src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv
similarity index 100%
rename from src/test/resources/CSVFileParser/bom.csv
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/bom.csv
diff --git a/src/test/resources/CSVFileParser/test.csv b/src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv
similarity index 93%
rename from src/test/resources/CSVFileParser/test.csv
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv
index ebdb952..93101ed 100644
--- a/src/test/resources/CSVFileParser/test.csv
+++ b/src/test/resources/org/apache/commons/csv/CSVFileParser/test.csv
@@ -1,16 +1,16 @@
-A,B,C,"D"
-# plain values
-a,b,c,d
-# spaces before and after
- e ,f , g,h
-# quoted: with spaces before and after
-" i ", " j " , " k "," l "
-# empty values
-,,,
-# empty quoted values
-"","","",""
-# 3 empty lines
-
-
-
-# EOF on next line
+A,B,C,"D"
+# plain values
+a,b,c,d
+# spaces before and after
+ e ,f , g,h
+# quoted: with spaces before and after
+" i ", " j " , " k "," l "
+# empty values
+,,,
+# empty quoted values
+"","","",""
+# 3 empty lines
+
+
+
+# EOF on next line
diff --git a/src/test/resources/CSVFileParser/testCSV246.csv b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv
similarity index 100%
rename from src/test/resources/CSVFileParser/testCSV246.csv
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246.csv
diff --git a/src/test/resources/CSVFileParser/testCSV246_checkWithNoComment.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/testCSV246_checkWithNoComment.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV246_checkWithNoComment.txt
diff --git a/src/test/resources/CSVFileParser/testCSV85.csv b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv
similarity index 82%
rename from src/test/resources/CSVFileParser/testCSV85.csv
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv
index b1baab3..69bb80e 100644
--- a/src/test/resources/CSVFileParser/testCSV85.csv
+++ b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85.csv
@@ -1,9 +1,9 @@
-# Comment 1
-a,b,c,e,f
-# Very Long
-# Comment 2
-g,h,i,j,k
-# Very Long
-
-# Comment 3
+# Comment 1
+a,b,c,e,f
+# Very Long
+# Comment 2
+g,h,i,j,k
+# Very Long
+
+# Comment 3
l,m,n,o,p
\ No newline at end of file
diff --git a/src/test/resources/CSVFileParser/testCSV85_default.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/testCSV85_default.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_default.txt
diff --git a/src/test/resources/CSVFileParser/testCSV85_ignoreEmpty.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/testCSV85_ignoreEmpty.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/testCSV85_ignoreEmpty.txt
diff --git a/src/test/resources/CSVFileParser/test_default.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/test_default.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/test_default.txt
diff --git a/src/test/resources/CSVFileParser/test_default_comment.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/test_default_comment.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/test_default_comment.txt
diff --git a/src/test/resources/CSVFileParser/test_rfc4180.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/test_rfc4180.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180.txt
diff --git a/src/test/resources/CSVFileParser/test_rfc4180_trim.txt b/src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt
similarity index 100%
rename from src/test/resources/CSVFileParser/test_rfc4180_trim.txt
rename to src/test/resources/org/apache/commons/csv/CSVFileParser/test_rfc4180_trim.txt
diff --git a/src/test/resources/org/apache/commons/csv/CSVFormat.java b/src/test/resources/org/apache/commons/csv/CSVFormat.java
new file mode 100644
index 0000000..c00f993
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSVFormat.java
@@ -0,0 +1,2330 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Constants.BACKSLASH;
+import static org.apache.commons.csv.Constants.COMMA;
+import static org.apache.commons.csv.Constants.COMMENT;
+import static org.apache.commons.csv.Constants.CR;
+import static org.apache.commons.csv.Constants.CRLF;
+import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
+import static org.apache.commons.csv.Constants.EMPTY;
+import static org.apache.commons.csv.Constants.LF;
+import static org.apache.commons.csv.Constants.PIPE;
+import static org.apache.commons.csv.Constants.SP;
+import static org.apache.commons.csv.Constants.TAB;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.Serializable;
+import java.io.StringWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Specifies the format of a CSV file and parses input.
+ *
+ * <h2>Using predefined formats</h2>
+ *
+ * <p>
+ * You can use one of the predefined formats:
+ * </p>
+ *
+ * <ul>
+ * <li>{@link #DEFAULT}</li>
+ * <li>{@link #EXCEL}</li>
+ * <li>{@link #INFORMIX_UNLOAD}</li>
+ * <li>{@link #INFORMIX_UNLOAD_CSV}</li>
+ * <li>{@link #MYSQL}</li>
+ * <li>{@link #RFC4180}</li>
+ * <li>{@link #ORACLE}</li>
+ * <li>{@link #POSTGRESQL_CSV}</li>
+ * <li>{@link #POSTGRESQL_TEXT}</li>
+ * <li>{@link #TDF}</li>
+ * </ul>
+ *
+ * <p>
+ * For example:
+ * </p>
+ *
+ * <pre>
+ * CSVParser parser = CSVFormat.EXCEL.parse(reader);
+ * </pre>
+ *
+ * <p>
+ * The {@link CSVParser} provides static methods to parse other input types, for example:
+ * </p>
+ *
+ * <pre>
+ * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
+ * </pre>
+ *
+ * <h2>Defining formats</h2>
+ *
+ * <p>
+ * You can extend a format by calling the {@code with} methods. For example:
+ * </p>
+ *
+ * <pre>
+ * CSVFormat.EXCEL.withNullString("N/A").withIgnoreSurroundingSpaces(true);
+ * </pre>
+ *
+ * <h2>Defining column names</h2>
+ *
+ * <p>
+ * To define the column names you want to use to access records, write:
+ * </p>
+ *
+ * <pre>
+ * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3");
+ * </pre>
+ *
+ * <p>
+ * Calling {@link #withHeader(String...)} lets you use the given names to address values in a {@link CSVRecord}, and
+ * assumes that your CSV source does not contain a first record that also defines column names.
+ *
+ * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
+ * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
+ * </p>
+ *
+ * <h2>Parsing</h2>
+ *
+ * <p>
+ * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
+ * </p>
+ *
+ * <pre>
+ * Reader in = ...;
+ * CSVFormat.EXCEL.withHeader("Col1", "Col2", "Col3").parse(in);
+ * </pre>
+ *
+ * <p>
+ * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
+ * </p>
+ *
+ * <h2>Referencing columns safely</h2>
+ *
+ * <p>
+ * If your source contains a header record, you can simplify your code and safely reference columns, by using
+ * {@link #withHeader(String...)} with no arguments:
+ * </p>
+ *
+ * <pre>
+ * CSVFormat.EXCEL.withHeader();
+ * </pre>
+ *
+ * <p>
+ * This causes the parser to read the first record and use its values as column names.
+ *
+ * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
+ * </p>
+ *
+ * <pre>
+ * String value = record.get("Col1");
+ * </pre>
+ *
+ * <p>
+ * This makes your code impervious to changes in column order in the CSV file.
+ * </p>
+ *
+ * <h2>Notes</h2>
+ *
+ * <p>
+ * This class is immutable.
+ * </p>
+ */
+public final class CSVFormat implements Serializable {
+
+ /**
+ * Predefines formats.
+ *
+ * @since 1.2
+ */
+ public enum Predefined {
+
+ /**
+ * @see CSVFormat#DEFAULT
+ */
+ Default(CSVFormat.DEFAULT),
+
+ /**
+ * @see CSVFormat#EXCEL
+ */
+ Excel(CSVFormat.EXCEL),
+
+ /**
+ * @see CSVFormat#INFORMIX_UNLOAD
+ * @since 1.3
+ */
+ InformixUnload(CSVFormat.INFORMIX_UNLOAD),
+
+ /**
+ * @see CSVFormat#INFORMIX_UNLOAD_CSV
+ * @since 1.3
+ */
+ InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV),
+
+ /**
+ * @see CSVFormat#MONGODB_CSV
+ * @since 1.7
+ */
+ MongoDBCsv(CSVFormat.MONGODB_CSV),
+
+ /**
+ * @see CSVFormat#MONGODB_TSV
+ * @since 1.7
+ */
+ MongoDBTsv(CSVFormat.MONGODB_TSV),
+
+ /**
+ * @see CSVFormat#MYSQL
+ */
+ MySQL(CSVFormat.MYSQL),
+
+ /**
+ * @see CSVFormat#ORACLE
+ */
+ Oracle(CSVFormat.ORACLE),
+
+ /**
+ * @see CSVFormat#POSTGRESQL_CSV
+ * @since 1.5
+ */
+ PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV),
+
+ /**
+ * @see CSVFormat#POSTGRESQL_CSV
+ */
+ PostgreSQLText(CSVFormat.POSTGRESQL_TEXT),
+
+ /**
+ * @see CSVFormat#RFC4180
+ */
+ RFC4180(CSVFormat.RFC4180),
+
+ /**
+ * @see CSVFormat#TDF
+ */
+ TDF(CSVFormat.TDF);
+
+ private final CSVFormat format;
+
+ Predefined(final CSVFormat format) {
+ this.format = format;
+ }
+
+ /**
+ * Gets the format.
+ *
+ * @return the format.
+ */
+ public CSVFormat getFormat() {
+ return format;
+ }
+ }
+
+ /**
+ * Standard Comma Separated Value format, as for {@link #RFC4180} but allowing empty lines.
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator("\r\n")}</li>
+ * <li>{@code withIgnoreEmptyLines(true)}</li>
+ * <li>{@code withAllowDuplicateHeaderNames(true)}</li>
+ * </ul>
+ *
+ * @see Predefined#Default
+ */
+ public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
+ null, null, null, false, false, false, false, false, false, true);
+
+ /**
+ * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
+ * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
+ *
+ * <p>
+ * For example for parsing or generating a CSV file on a French system the following format will be used:
+ * </p>
+ *
+ * <pre>
+ * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
+ * </pre>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator("\r\n")}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * <li>{@code withAllowMissingColumnNames(true)}</li>
+ * <li>{@code withAllowDuplicateHeaderNames(true)}</li>
+ * </ul>
+ * <p>
+ * Note: This is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
+ * withAllowMissingColumnNames(true)} and {@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}.
+ * </p>
+ *
+ * @see Predefined#Excel
+ */
+ // @formatter:off
+ public static final CSVFormat EXCEL = DEFAULT
+ .withIgnoreEmptyLines(false)
+ .withAllowMissingColumnNames();
+ // @formatter:on
+
+ /**
+ * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
+ *
+ * <p>
+ * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
+ * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withEscape('\\')}</li>
+ * <li>{@code withQuote("\"")}</li>
+ * <li>{@code withRecordSeparator('\n')}</li>
+ * </ul>
+ *
+ * @see Predefined#MySQL
+ * @see <a href=
+ * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
+ * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>
+ * @since 1.3
+ */
+ // @formatter:off
+ public static final CSVFormat INFORMIX_UNLOAD = DEFAULT
+ .withDelimiter(PIPE)
+ .withEscape(BACKSLASH)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withRecordSeparator(LF);
+ // @formatter:on
+
+ /**
+ * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
+ *
+ * <p>
+ * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
+ * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withQuote("\"")}</li>
+ * <li>{@code withRecordSeparator('\n')}</li>
+ * </ul>
+ *
+ * @see Predefined#MySQL
+ * @see <a href=
+ * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
+ * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>
+ * @since 1.3
+ */
+ // @formatter:off
+ public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT
+ .withDelimiter(COMMA)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withRecordSeparator(LF);
+ // @formatter:on
+
+ /**
+ * Default MongoDB CSV format used by the {@code mongoexport} operation.
+ * <p>
+ * <b>Parsing is not supported yet.</b>
+ * </p>
+ *
+ * <p>
+ * This is a comma-delimited format. Values are double quoted only if needed and special characters are escaped with
+ * {@code '"'}. A header line with field names is expected.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withEscape('"')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
+ * <li>{@code withSkipHeaderRecord(false)}</li>
+ * </ul>
+ *
+ * @see Predefined#MongoDBCsv
+ * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command
+ * documentation</a>
+ * @since 1.7
+ */
+ // @formatter:off
+ public static final CSVFormat MONGODB_CSV = DEFAULT
+ .withDelimiter(COMMA)
+ .withEscape(DOUBLE_QUOTE_CHAR)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withQuoteMode(QuoteMode.MINIMAL)
+ .withSkipHeaderRecord(false);
+ // @formatter:off
+
+ /**
+ * Default MongoDB TSV format used by the {@code mongoexport} operation.
+ * <p>
+ * <b>Parsing is not supported yet.</b>
+ * </p>
+ *
+ * <p>
+ * This is a tab-delimited format. Values are double quoted only if needed and special
+ * characters are escaped with {@code '"'}. A header line with field names is expected.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter('\t')}</li>
+ * <li>{@code withEscape('"')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
+ * <li>{@code withSkipHeaderRecord(false)}</li>
+ * </ul>
+ *
+ * @see Predefined#MongoDBCsv
+ * @see <a href="https://docs.mongodb.com/manual/reference/program/mongoexport/">MongoDB mongoexport command
+ * documentation</a>
+ * @since 1.7
+ */
+ // @formatter:off
+ public static final CSVFormat MONGODB_TSV = DEFAULT
+ .withDelimiter(TAB)
+ .withEscape(DOUBLE_QUOTE_CHAR)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withQuoteMode(QuoteMode.MINIMAL)
+ .withSkipHeaderRecord(false);
+ // @formatter:off
+
+ /**
+ * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
+ *
+ * <p>
+ * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
+ * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter('\t')}</li>
+ * <li>{@code withEscape('\\')}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * <li>{@code withQuote(null)}</li>
+ * <li>{@code withRecordSeparator('\n')}</li>
+ * <li>{@code withNullString("\\N")}</li>
+ * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
+ * </ul>
+ *
+ * @see Predefined#MySQL
+ * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
+ * -data.html</a>
+ */
+ // @formatter:off
+ public static final CSVFormat MYSQL = DEFAULT
+ .withDelimiter(TAB)
+ .withEscape(BACKSLASH)
+ .withIgnoreEmptyLines(false)
+ .withQuote(null)
+ .withRecordSeparator(LF)
+ .withNullString("\\N")
+ .withQuoteMode(QuoteMode.ALL_NON_NULL);
+ // @formatter:off
+
+ /**
+ * Default Oracle format used by the SQL*Loader utility.
+ *
+ * <p>
+ * This is a comma-delimited format with the system line separator character as the record separator.Values are
+ * double quoted when needed and special characters are escaped with {@code '"'}. The default NULL string is
+ * {@code ""}. Values are trimmed.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',') // default is {@code FIELDS TERMINATED BY ','}}</li>
+ * <li>{@code withEscape('\\')}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * <li>{@code withQuote('"') // default is {@code OPTIONALLY ENCLOSED BY '"'}}</li>
+ * <li>{@code withNullString("\\N")}</li>
+ * <li>{@code withTrim()}</li>
+ * <li>{@code withSystemRecordSeparator()}</li>
+ * <li>{@code withQuoteMode(QuoteMode.MINIMAL)}</li>
+ * </ul>
+ *
+ * @see Predefined#Oracle
+ * @see <a href="https://s.apache.org/CGXG">Oracle CSV Format Specification</a>
+ * @since 1.6
+ */
+ // @formatter:off
+ public static final CSVFormat ORACLE = DEFAULT
+ .withDelimiter(COMMA)
+ .withEscape(BACKSLASH)
+ .withIgnoreEmptyLines(false)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withNullString("\\N")
+ .withTrim()
+ .withSystemRecordSeparator()
+ .withQuoteMode(QuoteMode.MINIMAL);
+ // @formatter:off
+
+ /**
+ * Default PostgreSQL CSV format used by the {@code COPY} operation.
+ *
+ * <p>
+ * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
+ * characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withEscape('"')}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator('\n')}</li>
+ * <li>{@code withNullString("")}</li>
+ * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
+ * </ul>
+ *
+ * @see Predefined#MySQL
+ * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command
+ * documentation</a>
+ * @since 1.5
+ */
+ // @formatter:off
+ public static final CSVFormat POSTGRESQL_CSV = DEFAULT
+ .withDelimiter(COMMA)
+ .withEscape(DOUBLE_QUOTE_CHAR)
+ .withIgnoreEmptyLines(false)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withRecordSeparator(LF)
+ .withNullString(EMPTY)
+ .withQuoteMode(QuoteMode.ALL_NON_NULL);
+ // @formatter:off
+
+ /**
+ * Default PostgreSQL text format used by the {@code COPY} operation.
+ *
+ * <p>
+ * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
+ * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
+ * </p>
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter('\t')}</li>
+ * <li>{@code withEscape('\\')}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator('\n')}</li>
+ * <li>{@code withNullString("\\N")}</li>
+ * <li>{@code withQuoteMode(QuoteMode.ALL_NON_NULL)}</li>
+ * </ul>
+ *
+ * @see Predefined#MySQL
+ * @see <a href="https://www.postgresql.org/docs/current/static/sql-copy.html">PostgreSQL COPY command
+ * documentation</a>
+ * @since 1.5
+ */
+ // @formatter:off
+ public static final CSVFormat POSTGRESQL_TEXT = DEFAULT
+ .withDelimiter(TAB)
+ .withEscape(BACKSLASH)
+ .withIgnoreEmptyLines(false)
+ .withQuote(DOUBLE_QUOTE_CHAR)
+ .withRecordSeparator(LF)
+ .withNullString("\\N")
+ .withQuoteMode(QuoteMode.ALL_NON_NULL);
+ // @formatter:off
+
+ /**
+ * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter(',')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator("\r\n")}</li>
+ * <li>{@code withIgnoreEmptyLines(false)}</li>
+ * </ul>
+ *
+ * @see Predefined#RFC4180
+ */
+ public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
+
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Tab-delimited format.
+ *
+ * <p>
+ * Settings are:
+ * </p>
+ * <ul>
+ * <li>{@code withDelimiter('\t')}</li>
+ * <li>{@code withQuote('"')}</li>
+ * <li>{@code withRecordSeparator("\r\n")}</li>
+ * <li>{@code withIgnoreSurroundingSpaces(true)}</li>
+ * </ul>
+ *
+ * @see Predefined#TDF
+ */
+ // @formatter:off
+ public static final CSVFormat TDF = DEFAULT
+ .withDelimiter(TAB)
+ .withIgnoreSurroundingSpaces();
+ // @formatter:on
+
+ /**
+ * Returns true if the given character is a line break character.
+ *
+ * @param c
+ * the character to check
+ *
+ * @return true if {@code c} is a line break character
+ */
+ private static boolean isLineBreak(final char c) {
+ return c == LF || c == CR;
+ }
+
+ /**
+ * Returns true if the given character is a line break character.
+ *
+ * @param c
+ * the character to check, may be null
+ *
+ * @return true if {@code c} is a line break character (and not null)
+ */
+ private static boolean isLineBreak(final Character c) {
+ return c != null && isLineBreak(c.charValue());
+ }
+
+ /**
+ * Creates a new CSV format with the specified delimiter.
+ *
+ * <p>
+ * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized
+ * with null/false.
+ * </p>
+ *
+ * @param delimiter
+ * the char used for value separation, must not be a line break character
+ * @return a new CSV format.
+ * @throws IllegalArgumentException
+ * if the delimiter is a line break character
+ *
+ * @see #DEFAULT
+ * @see #RFC4180
+ * @see #MYSQL
+ * @see #EXCEL
+ * @see #TDF
+ */
+ public static CSVFormat newFormat(final char delimiter) {
+ return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
+ false, false, false, false, true);
+ }
+
+ /**
+ * Gets one of the predefined formats from {@link CSVFormat.Predefined}.
+ *
+ * @param format
+ * name
+ * @return one of the predefined formats
+ * @since 1.2
+ */
+ public static CSVFormat valueOf(final String format) {
+ return CSVFormat.Predefined.valueOf(format).getFormat();
+ }
+
+ private final boolean allowDuplicateHeaderNames;
+
+ private final boolean allowMissingColumnNames;
+
+ private final boolean autoFlush;
+
+ private final Character commentMarker; // null if commenting is disabled
+
+ private final char delimiter;
+
+ private final Character escapeCharacter; // null if escaping is disabled
+
+ private final String[] header; // array of header column names
+
+ private final String[] headerComments; // array of header comment lines
+
+ private final boolean ignoreEmptyLines;
+
+ private final boolean ignoreHeaderCase; // should ignore header names case
+
+ private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
+
+ private final String nullString; // the string to be used for null values
+
+ private final Character quoteCharacter; // null if quoting is disabled
+
+ private final String quotedNullString;
+
+ private final QuoteMode quoteMode;
+
+ private final String recordSeparator; // for outputs
+
+ private final boolean skipHeaderRecord;
+
+ private final boolean trailingDelimiter;
+
+ private final boolean trim;
+
+ /**
+ * Creates a customized CSV format.
+ *
+ * @param delimiter
+ * the char used for value separation, must not be a line break character
+ * @param quoteChar
+ * the Character used as value encapsulation marker, may be {@code null} to disable
+ * @param quoteMode
+ * the quote mode
+ * @param commentStart
+ * the Character used for comment identification, may be {@code null} to disable
+ * @param escape
+ * the Character used to escape special characters in values, may be {@code null} to disable
+ * @param ignoreSurroundingSpaces
+ * {@code true} when whitespaces enclosing values should be ignored
+ * @param ignoreEmptyLines
+ * {@code true} when the parser should skip empty lines
+ * @param recordSeparator
+ * the line separator to use for output
+ * @param nullString
+ * the line separator to use for output
+ * @param headerComments
+ * the comments to be printed by the Printer before the actual CSV data
+ * @param header
+ * the header
+ * @param skipHeaderRecord
+ * TODO
+ * @param allowMissingColumnNames
+ * TODO
+ * @param ignoreHeaderCase
+ * TODO
+ * @param trim
+ * TODO
+ * @param trailingDelimiter
+ * TODO
+ * @param autoFlush
+ * @throws IllegalArgumentException
+ * if the delimiter is a line break character
+ */
+ private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode,
+ final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces,
+ final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
+ final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
+ final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
+ final boolean trailingDelimiter, final boolean autoFlush, final boolean allowDuplicateHeaderNames) {
+ this.delimiter = delimiter;
+ this.quoteCharacter = quoteChar;
+ this.quoteMode = quoteMode;
+ this.commentMarker = commentStart;
+ this.escapeCharacter = escape;
+ this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
+ this.allowMissingColumnNames = allowMissingColumnNames;
+ this.ignoreEmptyLines = ignoreEmptyLines;
+ this.recordSeparator = recordSeparator;
+ this.nullString = nullString;
+ this.headerComments = toStringArray(headerComments);
+ this.header = header == null ? null : header.clone();
+ this.skipHeaderRecord = skipHeaderRecord;
+ this.ignoreHeaderCase = ignoreHeaderCase;
+ this.trailingDelimiter = trailingDelimiter;
+ this.trim = trim;
+ this.autoFlush = autoFlush;
+ this.quotedNullString = quoteCharacter + nullString + quoteCharacter;
+ this.allowDuplicateHeaderNames = allowDuplicateHeaderNames;
+ validate();
+ }
+
+ @Override
+ public boolean equals(final Object obj) {
+ if (this == obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+
+ final CSVFormat other = (CSVFormat) obj;
+ if (delimiter != other.delimiter) {
+ return false;
+ }
+ if (trailingDelimiter != other.trailingDelimiter) {
+ return false;
+ }
+ if (autoFlush != other.autoFlush) {
+ return false;
+ }
+ if (trim != other.trim) {
+ return false;
+ }
+ if (allowMissingColumnNames != other.allowMissingColumnNames) {
+ return false;
+ }
+ if (allowDuplicateHeaderNames != other.allowDuplicateHeaderNames) {
+ return false;
+ }
+ if (ignoreHeaderCase != other.ignoreHeaderCase) {
+ return false;
+ }
+ if (quoteMode != other.quoteMode) {
+ return false;
+ }
+ if (quoteCharacter == null) {
+ if (other.quoteCharacter != null) {
+ return false;
+ }
+ } else if (!quoteCharacter.equals(other.quoteCharacter)) {
+ return false;
+ }
+ if (commentMarker == null) {
+ if (other.commentMarker != null) {
+ return false;
+ }
+ } else if (!commentMarker.equals(other.commentMarker)) {
+ return false;
+ }
+ if (escapeCharacter == null) {
+ if (other.escapeCharacter != null) {
+ return false;
+ }
+ } else if (!escapeCharacter.equals(other.escapeCharacter)) {
+ return false;
+ }
+ if (nullString == null) {
+ if (other.nullString != null) {
+ return false;
+ }
+ } else if (!nullString.equals(other.nullString)) {
+ return false;
+ }
+ if (!Arrays.equals(header, other.header)) {
+ return false;
+ }
+ if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
+ return false;
+ }
+ if (ignoreEmptyLines != other.ignoreEmptyLines) {
+ return false;
+ }
+ if (skipHeaderRecord != other.skipHeaderRecord) {
+ return false;
+ }
+ if (recordSeparator == null) {
+ if (other.recordSeparator != null) {
+ return false;
+ }
+ } else if (!recordSeparator.equals(other.recordSeparator)) {
+ return false;
+ }
+ if (!Arrays.equals(headerComments, other.headerComments)) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Formats the specified values.
+ *
+ * @param values
+ * the values to format
+ * @return the formatted values
+ */
+ public String format(final Object... values) {
+ final StringWriter out = new StringWriter();
+ try (CSVPrinter csvPrinter = new CSVPrinter(out, this)) {
+ csvPrinter.printRecord(values);
+ String res = out.toString();
+ int len = recordSeparator != null ? res.length() - recordSeparator.length() : res.length();
+ return res.substring(0, len);
+ } catch (final IOException e) {
+ // should not happen because a StringWriter does not do IO.
+ throw new IllegalStateException(e);
+ }
+ }
+
+ /**
+ * Returns true if and only if duplicate names are allowed in the headers.
+ *
+ * @return whether duplicate header names are allowed
+ * @since 1.7
+ */
+ public boolean getAllowDuplicateHeaderNames() {
+ return allowDuplicateHeaderNames;
+ }
+
+ /**
+ * Specifies whether missing column names are allowed when parsing the header line.
+ *
+ * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
+ * {@link IllegalArgumentException}.
+ */
+ public boolean getAllowMissingColumnNames() {
+ return allowMissingColumnNames;
+ }
+
+ /**
+ * Returns whether to flush on close.
+ *
+ * @return whether to flush on close.
+ * @since 1.6
+ */
+ public boolean getAutoFlush() {
+ return autoFlush;
+ }
+
+ /**
+ * Returns the character marking the start of a line comment.
+ *
+ * @return the comment start marker, may be {@code null}
+ */
+ public Character getCommentMarker() {
+ return commentMarker;
+ }
+
+ /**
+ * Returns the character delimiting the values (typically ';', ',' or '\t').
+ *
+ * @return the delimiter character
+ */
+ public char getDelimiter() {
+ return delimiter;
+ }
+
+ /**
+ * Returns the escape character.
+ *
+ * @return the escape character, may be {@code null}
+ */
+ public Character getEscapeCharacter() {
+ return escapeCharacter;
+ }
+
+ /**
+ * Returns a copy of the header array.
+ *
+ * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
+ */
+ public String[] getHeader() {
+ return header != null ? header.clone() : null;
+ }
+
+ /**
+ * Returns a copy of the header comment array.
+ *
+ * @return a copy of the header comment array; {@code null} if disabled.
+ */
+ public String[] getHeaderComments() {
+ return headerComments != null ? headerComments.clone() : null;
+ }
+
+ /**
+ * Specifies whether empty lines between records are ignored when parsing input.
+ *
+ * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
+ * records.
+ */
+ public boolean getIgnoreEmptyLines() {
+ return ignoreEmptyLines;
+ }
+
+ /**
+ * Specifies whether header names will be accessed ignoring case.
+ *
+ * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive.
+ * @since 1.3
+ */
+ public boolean getIgnoreHeaderCase() {
+ return ignoreHeaderCase;
+ }
+
+ /**
+ * Specifies whether spaces around values are ignored when parsing input.
+ *
+ * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
+ */
+ public boolean getIgnoreSurroundingSpaces() {
+ return ignoreSurroundingSpaces;
+ }
+
+ /**
+ * Gets the String to convert to and from {@code null}.
+ * <ul>
+ * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
+ * records.</li>
+ * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
+ * </ul>
+ *
+ * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
+ */
+ public String getNullString() {
+ return nullString;
+ }
+
+ /**
+ * Returns the character used to encapsulate values containing special characters.
+ *
+ * @return the quoteChar character, may be {@code null}
+ */
+ public Character getQuoteCharacter() {
+ return quoteCharacter;
+ }
+
+ /**
+ * Returns the quote policy output fields.
+ *
+ * @return the quote policy
+ */
+ public QuoteMode getQuoteMode() {
+ return quoteMode;
+ }
+
+ /**
+ * Returns the record separator delimiting output records.
+ *
+ * @return the record separator
+ */
+ public String getRecordSeparator() {
+ return recordSeparator;
+ }
+
+ /**
+ * Returns whether to skip the header record.
+ *
+ * @return whether to skip the header record.
+ */
+ public boolean getSkipHeaderRecord() {
+ return skipHeaderRecord;
+ }
+
+ /**
+ * Returns whether to add a trailing delimiter.
+ *
+ * @return whether to add a trailing delimiter.
+ * @since 1.3
+ */
+ public boolean getTrailingDelimiter() {
+ return trailingDelimiter;
+ }
+
+ /**
+ * Returns whether to trim leading and trailing blanks.
+ * This is used by {@link #print(Object, Appendable, boolean)}
+ * Also by {@link CSVParser#addRecordValue(boolean)}
+ *
+ * @return whether to trim leading and trailing blanks.
+ */
+ public boolean getTrim() {
+ return trim;
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+
+ result = prime * result + delimiter;
+ result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
+ result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
+ result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
+ result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
+ result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
+ result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
+ result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
+ result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
+ result = prime * result + (skipHeaderRecord ? 1231 : 1237);
+ result = prime * result + (allowDuplicateHeaderNames ? 1231 : 1237);
+ result = prime * result + (trim ? 1231 : 1237);
+ result = prime * result + (autoFlush ? 1231 : 1237);
+ result = prime * result + (trailingDelimiter ? 1231 : 1237);
+ result = prime * result + (allowMissingColumnNames ? 1231 : 1237);
+ result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
+ result = prime * result + Arrays.hashCode(header);
+ result = prime * result + Arrays.hashCode(headerComments);
+ return result;
+ }
+
+ /**
+ * Specifies whether comments are supported by this format.
+ *
+ * Note that the comment introducer character is only recognized at the start of a line.
+ *
+ * @return {@code true} is comments are supported, {@code false} otherwise
+ */
+ public boolean isCommentMarkerSet() {
+ return commentMarker != null;
+ }
+
+ /**
+ * Returns whether escape are being processed.
+ *
+ * @return {@code true} if escapes are processed
+ */
+ public boolean isEscapeCharacterSet() {
+ return escapeCharacter != null;
+ }
+
+ /**
+ * Returns whether a nullString has been defined.
+ *
+ * @return {@code true} if a nullString is defined
+ */
+ public boolean isNullStringSet() {
+ return nullString != null;
+ }
+
+ /**
+ * Returns whether a quoteChar has been defined.
+ *
+ * @return {@code true} if a quoteChar is defined
+ */
+ public boolean isQuoteCharacterSet() {
+ return quoteCharacter != null;
+ }
+
+ /**
+ * Parses the specified content.
+ *
+ * <p>
+ * See also the various static parse methods on {@link CSVParser}.
+ * </p>
+ *
+ * @param in
+ * the input stream
+ * @return a parser over a stream of {@link CSVRecord}s.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public CSVParser parse(final Reader in) throws IOException {
+ return new CSVParser(in, this);
+ }
+
+ /**
+ * Prints to the specified output.
+ *
+ * <p>
+ * See also {@link CSVPrinter}.
+ * </p>
+ *
+ * @param out
+ * the output.
+ * @return a printer to an output.
+ * @throws IOException
+ * thrown if the optional header cannot be printed.
+ */
+ public CSVPrinter print(final Appendable out) throws IOException {
+ return new CSVPrinter(out, this);
+ }
+
+ /**
+ * Prints to the specified output.
+ *
+ * <p>
+ * See also {@link CSVPrinter}.
+ * </p>
+ *
+ * @param out
+ * the output.
+ * @param charset
+ * A charset.
+ * @return a printer to an output.
+ * @throws IOException
+ * thrown if the optional header cannot be printed.
+ * @since 1.5
+ */
+ @SuppressWarnings("resource")
+ public CSVPrinter print(final File out, final Charset charset) throws IOException {
+ // The writer will be closed when close() is called.
+ return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this);
+ }
+
+ /**
+ * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated
+ * as needed. Useful when one wants to avoid creating CSVPrinters.
+ * Trims the value if {@link #getTrim()} is true
+ * @param value
+ * value to output.
+ * @param out
+ * where to print the value.
+ * @param newRecord
+ * if this a new record.
+ * @throws IOException
+ * If an I/O error occurs.
+ * @since 1.4
+ */
+ public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
+ // null values are considered empty
+ // Only call CharSequence.toString() if you have to, helps GC-free use cases.
+ CharSequence charSequence;
+ if (value == null) {
+ // https://issues.apache.org/jira/browse/CSV-203
+ if (null == nullString) {
+ charSequence = EMPTY;
+ } else {
+ if (QuoteMode.ALL == quoteMode) {
+ charSequence = quotedNullString;
+ } else {
+ charSequence = nullString;
+ }
+ }
+ } else {
+ if (value instanceof CharSequence) {
+ charSequence = (CharSequence) value;
+ } else if (value instanceof Reader) {
+ print((Reader) value, out, newRecord);
+ return;
+ } else {
+ charSequence = value.toString();
+ }
+ }
+ charSequence = getTrim() ? trim(charSequence) : charSequence;
+ print(value, charSequence, out, newRecord);
+ }
+
+ private void print(final Object object, final CharSequence value, final Appendable out, final boolean newRecord)
+ throws IOException {
+ final int offset = 0;
+ final int len = value.length();
+ if (!newRecord) {
+ out.append(getDelimiter());
+ }
+ if (object == null) {
+ out.append(value);
+ } else if (isQuoteCharacterSet()) {
+ // the original object is needed so can check for Number
+ printWithQuotes(object, value, out, newRecord);
+ } else if (isEscapeCharacterSet()) {
+ printWithEscapes(value, out);
+ } else {
+ out.append(value, offset, len);
+ }
+ }
+
+ /**
+ * Prints to the specified output, returns a {@code CSVPrinter} which the caller MUST close.
+ *
+ * <p>
+ * See also {@link CSVPrinter}.
+ * </p>
+ *
+ * @param out the output.
+ * @param charset A charset.
+ * @return a printer to an output.
+ * @throws IOException thrown if the optional header cannot be printed.
+ * @since 1.5
+ */
+ @SuppressWarnings("resource")
+ public CSVPrinter print(final Path out, final Charset charset) throws IOException {
+ return print(Files.newBufferedWriter(out, charset));
+ }
+
+ private void print(final Reader reader, final Appendable out, final boolean newRecord) throws IOException {
+ // Reader is never null
+ if (!newRecord) {
+ out.append(getDelimiter());
+ }
+ if (isQuoteCharacterSet()) {
+ printWithQuotes(reader, out);
+ } else if (isEscapeCharacterSet()) {
+ printWithEscapes(reader, out);
+ } else if (out instanceof Writer) {
+ IOUtils.copyLarge(reader, (Writer) out);
+ } else {
+ IOUtils.copy(reader, out);
+ }
+
+ }
+
+ /**
+ * Prints to the {@link System#out}.
+ *
+ * <p>
+ * See also {@link CSVPrinter}.
+ * </p>
+ *
+ * @return a printer to {@link System#out}.
+ * @throws IOException
+ * thrown if the optional header cannot be printed.
+ * @since 1.5
+ */
+ public CSVPrinter printer() throws IOException {
+ return new CSVPrinter(System.out, this);
+ }
+
+ /**
+ * Outputs the trailing delimiter (if set) followed by the record separator (if set).
+ *
+ * @param out
+ * where to write
+ * @throws IOException
+ * If an I/O error occurs
+ * @since 1.4
+ */
+ public void println(final Appendable out) throws IOException {
+ if (getTrailingDelimiter()) {
+ out.append(getDelimiter());
+ }
+ if (recordSeparator != null) {
+ out.append(recordSeparator);
+ }
+ }
+
+ /**
+ * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the
+ * record separator.
+ *
+ * <p>
+ * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record
+ * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}.
+ * </p>
+ *
+ * @param out
+ * where to write.
+ * @param values
+ * values to output.
+ * @throws IOException
+ * If an I/O error occurs.
+ * @since 1.4
+ */
+ public void printRecord(final Appendable out, final Object... values) throws IOException {
+ for (int i = 0; i < values.length; i++) {
+ print(values[i], out, i == 0);
+ }
+ println(out);
+ }
+
+ /*
+ * Note: must only be called if escaping is enabled, otherwise will generate NPE
+ */
+ private void printWithEscapes(final CharSequence value, final Appendable out) throws IOException {
+ int start = 0;
+ int pos = 0;
+ final int len = value.length();
+ final int end = len;
+
+ final char delim = getDelimiter();
+ final char escape = getEscapeCharacter().charValue();
+
+ while (pos < end) {
+ char c = value.charAt(pos);
+ if (c == CR || c == LF || c == delim || c == escape) {
+ // write out segment up until this char
+ if (pos > start) {
+ out.append(value, start, pos);
+ }
+ if (c == LF) {
+ c = 'n';
+ } else if (c == CR) {
+ c = 'r';
+ }
+
+ out.append(escape);
+ out.append(c);
+
+ start = pos + 1; // start on the current char after this one
+ }
+ pos++;
+ }
+
+ // write last segment
+ if (pos > start) {
+ out.append(value, start, pos);
+ }
+ }
+
+ private void printWithEscapes(final Reader reader, final Appendable out) throws IOException {
+ int start = 0;
+ int pos = 0;
+
+ final char delim = getDelimiter();
+ final char escape = getEscapeCharacter().charValue();
+ final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
+
+ int c;
+ while (-1 != (c = reader.read())) {
+ builder.append((char) c);
+ if (c == CR || c == LF || c == delim || c == escape) {
+ // write out segment up until this char
+ if (pos > start) {
+ out.append(builder.substring(start, pos));
+ builder.setLength(0);
+ pos = -1;
+ }
+ if (c == LF) {
+ c = 'n';
+ } else if (c == CR) {
+ c = 'r';
+ }
+
+ out.append(escape);
+ out.append((char) c);
+
+ start = pos + 1; // start on the current char after this one
+ }
+ pos++;
+ }
+
+ // write last segment
+ if (pos > start) {
+ out.append(builder.substring(start, pos));
+ }
+ }
+
+ /*
+ * Note: must only be called if quoting is enabled, otherwise will generate NPE
+ */
+ // the original object is needed so can check for Number
+ private void printWithQuotes(final Object object, final CharSequence value, final Appendable out,
+ final boolean newRecord) throws IOException {
+ boolean quote = false;
+ int start = 0;
+ int pos = 0;
+ final int len = value.length();
+ final int end = len;
+
+ final char delimChar = getDelimiter();
+ final char quoteChar = getQuoteCharacter().charValue();
+ // If escape char not specified, default to the quote char
+ // This avoids having to keep checking whether there is an escape character
+ // at the cost of checking against quote twice
+ final char escapeChar = isEscapeCharacterSet() ? getEscapeCharacter().charValue() : quoteChar;
+
+ QuoteMode quoteModePolicy = getQuoteMode();
+ if (quoteModePolicy == null) {
+ quoteModePolicy = QuoteMode.MINIMAL;
+ }
+ switch (quoteModePolicy) {
+ case ALL:
+ case ALL_NON_NULL:
+ quote = true;
+ break;
+ case NON_NUMERIC:
+ quote = !(object instanceof Number);
+ break;
+ case NONE:
+ // Use the existing escaping code
+ printWithEscapes(value, out);
+ return;
+ case MINIMAL:
+ if (len <= 0) {
+ // always quote an empty token that is the first
+ // on the line, as it may be the only thing on the
+ // line. If it were not quoted in that case,
+ // an empty line has no tokens.
+ if (newRecord) {
+ quote = true;
+ }
+ } else {
+ char c = value.charAt(pos);
+
+ if (c <= COMMENT) {
+ // Some other chars at the start of a value caused the parser to fail, so for now
+ // encapsulate if we start in anything less than '#'. We are being conservative
+ // by including the default comment char too.
+ quote = true;
+ } else {
+ while (pos < end) {
+ c = value.charAt(pos);
+ if (c == LF || c == CR || c == quoteChar || c == delimChar || c == escapeChar) {
+ quote = true;
+ break;
+ }
+ pos++;
+ }
+
+ if (!quote) {
+ pos = end - 1;
+ c = value.charAt(pos);
+ // Some other chars at the end caused the parser to fail, so for now
+ // encapsulate if we end in anything less than ' '
+ if (c <= SP) {
+ quote = true;
+ }
+ }
+ }
+ }
+
+ if (!quote) {
+ // no encapsulation needed - write out the original value
+ out.append(value, start, end);
+ return;
+ }
+ break;
+ default:
+ throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
+ }
+
+ if (!quote) {
+ // no encapsulation needed - write out the original value
+ out.append(value, start, end);
+ return;
+ }
+
+ // we hit something that needed encapsulation
+ out.append(quoteChar);
+
+ // Pick up where we left off: pos should be positioned on the first character that caused
+ // the need for encapsulation.
+ while (pos < end) {
+ final char c = value.charAt(pos);
+ if (c == quoteChar || c == escapeChar) {
+ // write out the chunk up until this point
+ out.append(value, start, pos);
+ out.append(escapeChar); // now output the escape
+ start = pos; // and restart with the matched char
+ }
+ pos++;
+ }
+
+ // write the last segment
+ out.append(value, start, pos);
+ out.append(quoteChar);
+ }
+
+ /**
+ * Always use quotes unless QuoteMode is NONE, so we not have to look ahead.
+ *
+ * @throws IOException
+ */
+ private void printWithQuotes(final Reader reader, final Appendable out) throws IOException {
+
+ if (getQuoteMode() == QuoteMode.NONE) {
+ printWithEscapes(reader, out);
+ return;
+ }
+
+ int pos = 0;
+
+ final char quote = getQuoteCharacter().charValue();
+ final StringBuilder builder = new StringBuilder(IOUtils.DEFAULT_BUFFER_SIZE);
+
+ out.append(quote);
+
+ int c;
+ while (-1 != (c = reader.read())) {
+ builder.append((char) c);
+ if (c == quote) {
+ // write out segment up until this char
+ if (pos > 0) {
+ out.append(builder.substring(0, pos));
+ builder.setLength(0);
+ pos = -1;
+ }
+
+ out.append(quote);
+ out.append((char) c);
+ }
+ pos++;
+ }
+
+ // write last segment
+ if (pos > 0) {
+ out.append(builder.substring(0, pos));
+ }
+
+ out.append(quote);
+ }
+
+ @Override
+ public String toString() {
+ final StringBuilder sb = new StringBuilder();
+ sb.append("Delimiter=<").append(delimiter).append('>');
+ if (isEscapeCharacterSet()) {
+ sb.append(' ');
+ sb.append("Escape=<").append(escapeCharacter).append('>');
+ }
+ if (isQuoteCharacterSet()) {
+ sb.append(' ');
+ sb.append("QuoteChar=<").append(quoteCharacter).append('>');
+ }
+ if (quoteMode != null) {
+ sb.append(' ');
+ sb.append("QuoteMode=<").append(quoteMode).append('>');
+ }
+ if (isCommentMarkerSet()) {
+ sb.append(' ');
+ sb.append("CommentStart=<").append(commentMarker).append('>');
+ }
+ if (isNullStringSet()) {
+ sb.append(' ');
+ sb.append("NullString=<").append(nullString).append('>');
+ }
+ if (recordSeparator != null) {
+ sb.append(' ');
+ sb.append("RecordSeparator=<").append(recordSeparator).append('>');
+ }
+ if (getIgnoreEmptyLines()) {
+ sb.append(" EmptyLines:ignored");
+ }
+ if (getIgnoreSurroundingSpaces()) {
+ sb.append(" SurroundingSpaces:ignored");
+ }
+ if (getIgnoreHeaderCase()) {
+ sb.append(" IgnoreHeaderCase:ignored");
+ }
+ sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
+ if (headerComments != null) {
+ sb.append(' ');
+ sb.append("HeaderComments:").append(Arrays.toString(headerComments));
+ }
+ if (header != null) {
+ sb.append(' ');
+ sb.append("Header:").append(Arrays.toString(header));
+ }
+ return sb.toString();
+ }
+
+ private String[] toStringArray(final Object[] values) {
+ if (values == null) {
+ return null;
+ }
+ final String[] strings = new String[values.length];
+ for (int i = 0; i < values.length; i++) {
+ final Object value = values[i];
+ strings[i] = value == null ? null : value.toString();
+ }
+ return strings;
+ }
+
+ private CharSequence trim(final CharSequence charSequence) {
+ if (charSequence instanceof String) {
+ return ((String) charSequence).trim();
+ }
+ final int count = charSequence.length();
+ int len = count;
+ int pos = 0;
+
+ while (pos < len && charSequence.charAt(pos) <= SP) {
+ pos++;
+ }
+ while (pos < len && charSequence.charAt(len - 1) <= SP) {
+ len--;
+ }
+ return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence;
+ }
+
+ /**
+ * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
+ *
+ * @throws IllegalArgumentException
+ */
+ private void validate() throws IllegalArgumentException {
+ if (isLineBreak(delimiter)) {
+ throw new IllegalArgumentException("The delimiter cannot be a line break");
+ }
+
+ if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
+ throw new IllegalArgumentException(
+ "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
+ }
+
+ if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
+ throw new IllegalArgumentException(
+ "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
+ }
+
+ if (commentMarker != null && delimiter == commentMarker.charValue()) {
+ throw new IllegalArgumentException(
+ "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
+ }
+
+ if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
+ throw new IllegalArgumentException(
+ "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')");
+ }
+
+ if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
+ throw new IllegalArgumentException(
+ "The comment start and the escape character cannot be the same ('" + commentMarker + "')");
+ }
+
+ if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
+ throw new IllegalArgumentException("No quotes mode set but no escape character is set");
+ }
+
+ // validate header
+ if (header != null && !allowDuplicateHeaderNames) {
+ final Set<String> dupCheck = new HashSet<>();
+ for (final String hdr : header) {
+ if (!dupCheck.add(hdr)) {
+ throw new IllegalArgumentException(
+ "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header));
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} that allows duplicate header names.
+ *
+ * @return a new {@code CSVFormat} that allows duplicate header names
+ * @since 1.7
+ */
+ public CSVFormat withAllowDuplicateHeaderNames() {
+ return withAllowDuplicateHeaderNames(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with duplicate header names behavior set to the given value.
+ *
+ * @param allowDuplicateHeaderNames the duplicate header names behavior, true to allow, false to disallow.
+ * @return a new {@code CSVFormat} with duplicate header names behavior set to the given value.
+ * @since 1.7
+ */
+ public CSVFormat withAllowDuplicateHeaderNames(final boolean allowDuplicateHeaderNames) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}
+ *
+ * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
+ * @see #withAllowMissingColumnNames(boolean)
+ * @since 1.1
+ */
+ public CSVFormat withAllowMissingColumnNames() {
+ return this.withAllowMissingColumnNames(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
+ *
+ * @param allowMissingColumnNames
+ * the missing column names behavior, {@code true} to allow missing column names in the header line,
+ * {@code false} to cause an {@link IllegalArgumentException} to be thrown.
+ * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
+ */
+ public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether to flush on close.
+ *
+ * @param autoFlush
+ * whether to flush on close.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified autoFlush setting.
+ * @since 1.6
+ */
+ public CSVFormat withAutoFlush(final boolean autoFlush) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
+ *
+ * Note that the comment start character is only recognized at the start of a line.
+ *
+ * @param commentMarker
+ * the comment start marker
+ * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withCommentMarker(final char commentMarker) {
+ return withCommentMarker(Character.valueOf(commentMarker));
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
+ *
+ * Note that the comment start character is only recognized at the start of a line.
+ *
+ * @param commentMarker
+ * the comment start marker, use {@code null} to disable
+ * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withCommentMarker(final Character commentMarker) {
+ if (isLineBreak(commentMarker)) {
+ throw new IllegalArgumentException("The comment start marker character cannot be a line break");
+ }
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character.
+ *
+ * @param delimiter
+ * the delimiter character
+ * @return A new CSVFormat that is equal to this with the specified character as delimiter
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withDelimiter(final char delimiter) {
+ if (isLineBreak(delimiter)) {
+ throw new IllegalArgumentException("The delimiter cannot be a line break");
+ }
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
+ *
+ * @param escape
+ * the escape character
+ * @return A new CSVFormat that is equal to his but with the specified character as the escape character
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withEscape(final char escape) {
+ return withEscape(Character.valueOf(escape));
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
+ *
+ * @param escape
+ * the escape character, use {@code null} to disable
+ * @return A new CSVFormat that is equal to this but with the specified character as the escape character
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withEscape(final Character escape) {
+ if (isLineBreak(escape)) {
+ throw new IllegalArgumentException("The escape character cannot be a line break");
+ }
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
+ ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
+ allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} using the first record as header.
+ *
+ * <p>
+ * Calling this method is equivalent to calling:
+ * </p>
+ *
+ * <pre>
+ * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
+ * </pre>
+ *
+ * @return A new CSVFormat that is equal to this but using the first record as header.
+ * @see #withSkipHeaderRecord(boolean)
+ * @see #withHeader(String...)
+ * @since 1.3
+ */
+ public CSVFormat withFirstRecordAsHeader() {
+ return withHeader().withSkipHeaderRecord();
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header of the format defined by the enum class.
+ *
+ * <p>
+ * Example:
+ * </p>
+ *
+ * <pre>
+ * public enum Header {
+ * Name, Email, Phone
+ * }
+ *
+ * CSVFormat format = aformat.withHeader(Header.class);
+ * </pre>
+ * <p>
+ * The header is also used by the {@link CSVPrinter}.
+ * </p>
+ *
+ * @param headerEnum
+ * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified
+ * otherwise.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified header
+ * @see #withHeader(String...)
+ * @see #withSkipHeaderRecord(boolean)
+ * @since 1.3
+ */
+ public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) {
+ String[] header = null;
+ if (headerEnum != null) {
+ final Enum<?>[] enumValues = headerEnum.getEnumConstants();
+ header = new String[enumValues.length];
+ for (int i = 0; i < enumValues.length; i++) {
+ header[i] = enumValues[i].name();
+ }
+ }
+ return withHeader(header);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
+ * either be parsed automatically from the input file with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader();
+ * </pre>
+ *
+ * or specified manually with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader(resultSet);
+ * </pre>
+ * <p>
+ * The header is also used by the {@link CSVPrinter}.
+ * </p>
+ *
+ * @param resultSet
+ * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified
+ * otherwise.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified header
+ * @throws SQLException
+ * SQLException if a database access error occurs or this method is called on a closed result set.
+ * @since 1.1
+ */
+ public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
+ return withHeader(resultSet != null ? resultSet.getMetaData() : null);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
+ * either be parsed automatically from the input file with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader();
+ * </pre>
+ *
+ * or specified manually with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader(metaData);
+ * </pre>
+ * <p>
+ * The header is also used by the {@link CSVPrinter}.
+ * </p>
+ *
+ * @param metaData
+ * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified
+ * otherwise.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified header
+ * @throws SQLException
+ * SQLException if a database access error occurs or this method is called on a closed result set.
+ * @since 1.1
+ */
+ public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException {
+ String[] labels = null;
+ if (metaData != null) {
+ final int columnCount = metaData.getColumnCount();
+ labels = new String[columnCount];
+ for (int i = 0; i < columnCount; i++) {
+ labels[i] = metaData.getColumnLabel(i + 1);
+ }
+ }
+ return withHeader(labels);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be
+ * parsed automatically from the input file with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader();
+ * </pre>
+ *
+ * or specified manually with:
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeader("name", "email", "phone");
+ * </pre>
+ * <p>
+ * The header is also used by the {@link CSVPrinter}.
+ * </p>
+ *
+ * @param header
+ * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified header
+ * @see #withSkipHeaderRecord(boolean)
+ */
+ public CSVFormat withHeader(final String... header) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will
+ * be printed first, before the headers. This setting is ignored by the parser.
+ *
+ * <pre>
+ * CSVFormat format = aformat.withHeaderComments("Generated by Apache Commons CSV 1.1.", new Date());
+ * </pre>
+ *
+ * @param headerComments
+ * the headerComments which will be printed by the Printer before the actual CSV data.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified header
+ * @see #withSkipHeaderRecord(boolean)
+ * @since 1.1
+ */
+ public CSVFormat withHeaderComments(final Object... headerComments) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
+ * @since {@link #withIgnoreEmptyLines(boolean)}
+ * @since 1.1
+ */
+ public CSVFormat withIgnoreEmptyLines() {
+ return this.withIgnoreEmptyLines(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
+ *
+ * @param ignoreEmptyLines
+ * the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
+ * {@code false} to translate empty lines to empty records.
+ * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
+ */
+ public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
+ *
+ * @return A new CSVFormat that will ignore case header name.
+ * @see #withIgnoreHeaderCase(boolean)
+ * @since 1.3
+ */
+ public CSVFormat withIgnoreHeaderCase() {
+ return this.withIgnoreHeaderCase(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case.
+ *
+ * @param ignoreHeaderCase
+ * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as
+ * is.
+ * @return A new CSVFormat that will ignore case header name if specified as {@code true}
+ * @since 1.3
+ */
+ public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to {@code true}.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified parser trimming behavior.
+ * @see #withIgnoreSurroundingSpaces(boolean)
+ * @since 1.1
+ */
+ public CSVFormat withIgnoreSurroundingSpaces() {
+ return this.withIgnoreSurroundingSpaces(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the parser trimming behavior of the format set to the given value.
+ *
+ * @param ignoreSurroundingSpaces the parser trimming behavior, {@code true} to remove the surrounding spaces,
+ * {@code false} to leave the spaces as is.
+ * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
+ */
+ public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
+ * <ul>
+ * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
+ * records.</li>
+ * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
+ * </ul>
+ *
+ * @param nullString
+ * the String to convert to and from {@code null}. No substitution occurs if {@code null}
+ *
+ * @return A new CSVFormat that is equal to this but with the specified null conversion string.
+ */
+ public CSVFormat withNullString(final String nullString) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
+ *
+ * @param quoteChar
+ * the quoteChar character
+ * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withQuote(final char quoteChar) {
+ return withQuote(Character.valueOf(quoteChar));
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
+ *
+ * @param quoteChar
+ * the quoteChar character, use {@code null} to disable
+ * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
+ * @throws IllegalArgumentException
+ * thrown if the specified character is a line break
+ */
+ public CSVFormat withQuote(final Character quoteChar) {
+ if (isLineBreak(quoteChar)) {
+ throw new IllegalArgumentException("The quoteChar cannot be a line break");
+ }
+ return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
+ ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
+ allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
+ *
+ * @param quoteModePolicy
+ * the quote policy to use for output.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified quote policy
+ */
+ public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
+ *
+ * <p>
+ * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
+ * only works for inputs with '\n', '\r' and "\r\n"
+ * </p>
+ *
+ * @param recordSeparator
+ * the record separator to use for output.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified output record separator
+ */
+ public CSVFormat withRecordSeparator(final char recordSeparator) {
+ return withRecordSeparator(String.valueOf(recordSeparator));
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
+ *
+ * <p>
+ * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
+ * only works for inputs with '\n', '\r' and "\r\n"
+ * </p>
+ *
+ * @param recordSeparator
+ * the record separator to use for output.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified output record separator
+ * @throws IllegalArgumentException
+ * if recordSeparator is none of CR, LF or CRLF
+ */
+ public CSVFormat withRecordSeparator(final String recordSeparator) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
+ * @see #withSkipHeaderRecord(boolean)
+ * @see #withHeader(String...)
+ * @since 1.1
+ */
+ public CSVFormat withSkipHeaderRecord() {
+ return this.withSkipHeaderRecord(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether to skip the header record.
+ *
+ * @param skipHeaderRecord
+ * whether to skip the header record.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified skipHeaderRecord setting.
+ * @see #withHeader(String...)
+ */
+ public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line
+ * separator string, typically CR+LF on Windows and LF on Linux.
+ *
+ * <p>
+ * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
+ * only works for inputs with '\n', '\r' and "\r\n"
+ * </p>
+ *
+ * @return A new CSVFormat that is equal to this but with the operating system's line separator string.
+ * @since 1.6
+ */
+ public CSVFormat withSystemRecordSeparator() {
+ return withRecordSeparator(System.getProperty("line.separator"));
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} to add a trailing delimiter.
+ *
+ * @return A new CSVFormat that is equal to this but with the trailing delimiter setting.
+ * @since 1.3
+ */
+ public CSVFormat withTrailingDelimiter() {
+ return withTrailingDelimiter(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether to add a trailing delimiter.
+ *
+ * @param trailingDelimiter
+ * whether to add a trailing delimiter.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting.
+ * @since 1.3
+ */
+ public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} to trim leading and trailing blanks.
+ * See {@link #getTrim()} for details of where this is used.
+ *
+ * @return A new CSVFormat that is equal to this but with the trim setting on.
+ * @since 1.3
+ */
+ public CSVFormat withTrim() {
+ return withTrim(true);
+ }
+
+ /**
+ * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks.
+ * See {@link #getTrim()} for details of where this is used.
+ *
+ * @param trim
+ * whether to trim leading and trailing blanks.
+ *
+ * @return A new CSVFormat that is equal to this but with the specified trim setting.
+ * @since 1.3
+ */
+ public CSVFormat withTrim(final boolean trim) {
+ return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
+ ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
+ skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush,
+ allowDuplicateHeaderNames);
+ }
+}
diff --git a/src/test/resources/org/apache/commons/csv/CSVParser.java b/src/test/resources/org/apache/commons/csv/CSVParser.java
new file mode 100644
index 0000000..bf6eb6d
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSVParser.java
@@ -0,0 +1,715 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Token.Type.TOKEN;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.io.StringReader;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Objects;
+import java.util.TreeMap;
+
+/**
+ * Parses CSV files according to the specified format.
+ *
+ * Because CSV appears in many different dialects, the parser supports many formats by allowing the
+ * specification of a {@link CSVFormat}.
+ *
+ * The parser works record wise. It is not possible to go back, once a record has been parsed from the input stream.
+ *
+ * <h2>Creating instances</h2>
+ * <p>
+ * There are several static factory methods that can be used to create instances for various types of resources:
+ * </p>
+ * <ul>
+ * <li>{@link #parse(java.io.File, Charset, CSVFormat)}</li>
+ * <li>{@link #parse(String, CSVFormat)}</li>
+ * <li>{@link #parse(java.net.URL, java.nio.charset.Charset, CSVFormat)}</li>
+ * </ul>
+ * <p>
+ * Alternatively parsers can also be created by passing a {@link Reader} directly to the sole constructor.
+ *
+ * For those who like fluent APIs, parsers can be created using {@link CSVFormat#parse(java.io.Reader)} as a shortcut:
+ * </p>
+ * <pre>
+ * for(CSVRecord record : CSVFormat.EXCEL.parse(in)) {
+ * ...
+ * }
+ * </pre>
+ *
+ * <h2>Parsing record wise</h2>
+ * <p>
+ * To parse a CSV input from a file, you write:
+ * </p>
+ *
+ * <pre>
+ * File csvData = new File("/path/to/csv");
+ * CSVParser parser = CSVParser.parse(csvData, CSVFormat.RFC4180);
+ * for (CSVRecord csvRecord : parser) {
+ * ...
+ * }
+ * </pre>
+ *
+ * <p>
+ * This will read the parse the contents of the file using the
+ * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a> format.
+ * </p>
+ *
+ * <p>
+ * To parse CSV input in a format like Excel, you write:
+ * </p>
+ *
+ * <pre>
+ * CSVParser parser = CSVParser.parse(csvData, CSVFormat.EXCEL);
+ * for (CSVRecord csvRecord : parser) {
+ * ...
+ * }
+ * </pre>
+ *
+ * <p>
+ * If the predefined formats don't match the format at hands, custom formats can be defined. More information about
+ * customising CSVFormats is available in {@link CSVFormat CSVFormat Javadoc}.
+ * </p>
+ *
+ * <h2>Parsing into memory</h2>
+ * <p>
+ * If parsing record wise is not desired, the contents of the input can be read completely into memory.
+ * </p>
+ *
+ * <pre>
+ * Reader in = new StringReader("a;b\nc;d");
+ * CSVParser parser = new CSVParser(in, CSVFormat.EXCEL);
+ * List<CSVRecord> list = parser.getRecords();
+ * </pre>
+ *
+ * <p>
+ * There are two constraints that have to be kept in mind:
+ * </p>
+ *
+ * <ol>
+ * <li>Parsing into memory starts at the current position of the parser. If you have already parsed records from
+ * the input, those records will not end up in the in memory representation of your CSV data.</li>
+ * <li>Parsing into memory may consume a lot of system resources depending on the input. For example if you're
+ * parsing a 150MB file of CSV data the contents will be read completely into memory.</li>
+ * </ol>
+ *
+ * <h2>Notes</h2>
+ * <p>
+ * Internal parser state is completely covered by the format and the reader-state.
+ * </p>
+ *
+ * @see <a href="package-summary.html">package documentation for more details</a>
+ */
+public final class CSVParser implements Iterable<CSVRecord>, Closeable {
+
+ class CSVRecordIterator implements Iterator<CSVRecord> {
+ private CSVRecord current;
+
+ private CSVRecord getNextRecord() {
+ try {
+ return CSVParser.this.nextRecord();
+ } catch (final IOException e) {
+ throw new IllegalStateException(
+ e.getClass().getSimpleName() + " reading next record: " + e.toString(), e);
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (CSVParser.this.isClosed()) {
+ return false;
+ }
+ if (this.current == null) {
+ this.current = this.getNextRecord();
+ }
+
+ return this.current != null;
+ }
+
+ @Override
+ public CSVRecord next() {
+ if (CSVParser.this.isClosed()) {
+ throw new NoSuchElementException("CSVParser has been closed");
+ }
+ CSVRecord next = this.current;
+ this.current = null;
+
+ if (next == null) {
+ // hasNext() wasn't called before
+ next = this.getNextRecord();
+ if (next == null) {
+ throw new NoSuchElementException("No more CSV records available");
+ }
+ }
+
+ return next;
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+
+ /**
+ * Header information based on name and position.
+ */
+ private static final class Headers {
+ /**
+ * Header column positions (0-based)
+ */
+ final Map<String, Integer> headerMap;
+
+ /**
+ * Header names in column order
+ */
+ final List<String> headerNames;
+
+ Headers(final Map<String, Integer> headerMap, final List<String> headerNames) {
+ this.headerMap = headerMap;
+ this.headerNames = headerNames;
+ }
+ }
+
+ /**
+ * Creates a parser for the given {@link File}.
+ *
+ * @param file
+ * a CSV file. Must not be null.
+ * @param charset
+ * The Charset to decode the given file.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new parser
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either file or format are null.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @SuppressWarnings("resource")
+ public static CSVParser parse(final File file, final Charset charset, final CSVFormat format) throws IOException {
+ Objects.requireNonNull(file, "file");
+ Objects.requireNonNull(format, "format");
+ return new CSVParser(new InputStreamReader(new FileInputStream(file), charset), format);
+ }
+
+ /**
+ * Creates a CSV parser using the given {@link CSVFormat}.
+ *
+ * <p>
+ * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+ * unless you close the {@code reader}.
+ * </p>
+ *
+ * @param inputStream
+ * an InputStream containing CSV-formatted input. Must not be null.
+ * @param charset
+ * The Charset to decode the given file.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new CSVParser configured with the given reader and format.
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either reader or format are null.
+ * @throws IOException
+ * If there is a problem reading the header or skipping the first record
+ * @since 1.5
+ */
+ @SuppressWarnings("resource")
+ public static CSVParser parse(final InputStream inputStream, final Charset charset, final CSVFormat format)
+ throws IOException {
+ Objects.requireNonNull(inputStream, "inputStream");
+ Objects.requireNonNull(format, "format");
+ return parse(new InputStreamReader(inputStream, charset), format);
+ }
+
+ /**
+ * Creates and returns a parser for the given {@link Path}, which the caller MUST close.
+ *
+ * @param path
+ * a CSV file. Must not be null.
+ * @param charset
+ * The Charset to decode the given file.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new parser
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either file or format are null.
+ * @throws IOException
+ * If an I/O error occurs
+ * @since 1.5
+ */
+ @SuppressWarnings("resource")
+ public static CSVParser parse(final Path path, final Charset charset, final CSVFormat format) throws IOException {
+ Objects.requireNonNull(path, "path");
+ Objects.requireNonNull(format, "format");
+ return parse(Files.newInputStream(path), charset, format);
+ }
+
+ /**
+ * Creates a CSV parser using the given {@link CSVFormat}
+ *
+ * <p>
+ * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+ * unless you close the {@code reader}.
+ * </p>
+ *
+ * @param reader
+ * a Reader containing CSV-formatted input. Must not be null.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new CSVParser configured with the given reader and format.
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either reader or format are null.
+ * @throws IOException
+ * If there is a problem reading the header or skipping the first record
+ * @since 1.5
+ */
+ public static CSVParser parse(final Reader reader, final CSVFormat format) throws IOException {
+ return new CSVParser(reader, format);
+ }
+
+ // the following objects are shared to reduce garbage
+
+ /**
+ * Creates a parser for the given {@link String}.
+ *
+ * @param string
+ * a CSV string. Must not be null.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new parser
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either string or format are null.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public static CSVParser parse(final String string, final CSVFormat format) throws IOException {
+ Objects.requireNonNull(string, "string");
+ Objects.requireNonNull(format, "format");
+
+ return new CSVParser(new StringReader(string), format);
+ }
+
+ /**
+ * Creates and returns a parser for the given URL, which the caller MUST close.
+ *
+ * <p>
+ * If you do not read all records from the given {@code url}, you should call {@link #close()} on the parser, unless
+ * you close the {@code url}.
+ * </p>
+ *
+ * @param url
+ * a URL. Must not be null.
+ * @param charset
+ * the charset for the resource. Must not be null.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @return a new parser
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either url, charset or format are null.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @SuppressWarnings("resource")
+ public static CSVParser parse(final URL url, final Charset charset, final CSVFormat format) throws IOException {
+ Objects.requireNonNull(url, "url");
+ Objects.requireNonNull(charset, "charset");
+ Objects.requireNonNull(format, "format");
+
+ return new CSVParser(new InputStreamReader(url.openStream(), charset), format);
+ }
+
+ private final CSVFormat format;
+
+ /** A mapping of column names to column indices */
+ private final Map<String, Integer> headerMap;
+
+ /** The column order to avoid re-computing it. */
+ private final List<String> headerNames;
+
+ private final Lexer lexer;
+
+ private final CSVRecordIterator csvRecordIterator;
+
+ /** A record buffer for getRecord(). Grows as necessary and is reused. */
+ private final List<String> recordList = new ArrayList<>();
+
+ /**
+ * The next record number to assign.
+ */
+ private long recordNumber;
+
+ /**
+ * Lexer offset when the parser does not start parsing at the beginning of the source. Usually used in combination
+ * with {@link #recordNumber}.
+ */
+ private final long characterOffset;
+
+ private final Token reusableToken = new Token();
+
+ /**
+ * Customized CSV parser using the given {@link CSVFormat}
+ *
+ * <p>
+ * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+ * unless you close the {@code reader}.
+ * </p>
+ *
+ * @param reader
+ * a Reader containing CSV-formatted input. Must not be null.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either reader or format are null.
+ * @throws IOException
+ * If there is a problem reading the header or skipping the first record
+ */
+ public CSVParser(final Reader reader, final CSVFormat format) throws IOException {
+ this(reader, format, 0, 1);
+ }
+
+ /**
+ * Customized CSV parser using the given {@link CSVFormat}
+ *
+ * <p>
+ * If you do not read all records from the given {@code reader}, you should call {@link #close()} on the parser,
+ * unless you close the {@code reader}.
+ * </p>
+ *
+ * @param reader
+ * a Reader containing CSV-formatted input. Must not be null.
+ * @param format
+ * the CSVFormat used for CSV parsing. Must not be null.
+ * @param characterOffset
+ * Lexer offset when the parser does not start parsing at the beginning of the source.
+ * @param recordNumber
+ * The next record number to assign
+ * @throws IllegalArgumentException
+ * If the parameters of the format are inconsistent or if either reader or format are null.
+ * @throws IOException
+ * If there is a problem reading the header or skipping the first record
+ * @since 1.1
+ */
+ @SuppressWarnings("resource")
+ public CSVParser(final Reader reader, final CSVFormat format, final long characterOffset, final long recordNumber)
+ throws IOException {
+ Objects.requireNonNull(reader, "reader");
+ Objects.requireNonNull(format, "format");
+
+ this.format = format;
+ this.lexer = new Lexer(format, new ExtendedBufferedReader(reader));
+ this.csvRecordIterator = new CSVRecordIterator();
+ final Headers headers = createHeaders();
+ this.headerMap = headers.headerMap;
+ this.headerNames = headers.headerNames;
+ this.characterOffset = characterOffset;
+ this.recordNumber = recordNumber - 1;
+ }
+
+ private void addRecordValue(final boolean lastRecord) {
+ final String input = this.reusableToken.content.toString();
+ final String inputClean = this.format.getTrim() ? input.trim() : input;
+ if (lastRecord && inputClean.isEmpty() && this.format.getTrailingDelimiter()) {
+ return;
+ }
+ final String nullString = this.format.getNullString();
+ this.recordList.add(inputClean.equals(nullString) ? null : inputClean);
+ }
+
+ /**
+ * Closes resources.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ if (this.lexer != null) {
+ this.lexer.close();
+ }
+ }
+
+ private Map<String, Integer> createEmptyHeaderMap() {
+ return this.format.getIgnoreHeaderCase() ?
+ new TreeMap<>(String.CASE_INSENSITIVE_ORDER) :
+ new LinkedHashMap<>();
+ }
+
+ /**
+ * Creates the name to index mapping if the format defines a header.
+ *
+ * @return null if the format has no header.
+ * @throws IOException if there is a problem reading the header or skipping the first record
+ */
+ private Headers createHeaders() throws IOException {
+ Map<String, Integer> hdrMap = null;
+ List<String> headerNames = null;
+ final String[] formatHeader = this.format.getHeader();
+ if (formatHeader != null) {
+ hdrMap = createEmptyHeaderMap();
+ String[] headerRecord = null;
+ if (formatHeader.length == 0) {
+ // read the header from the first line of the file
+ final CSVRecord nextRecord = this.nextRecord();
+ if (nextRecord != null) {
+ headerRecord = nextRecord.values();
+ }
+ } else {
+ if (this.format.getSkipHeaderRecord()) {
+ this.nextRecord();
+ }
+ headerRecord = formatHeader;
+ }
+
+ // build the name to index mappings
+ if (headerRecord != null) {
+ for (int i = 0; i < headerRecord.length; i++) {
+ final String header = headerRecord[i];
+ final boolean emptyHeader = header == null || header.trim().isEmpty();
+ if (emptyHeader && !this.format.getAllowMissingColumnNames()) {
+ throw new IllegalArgumentException(
+ "A header name is missing in " + Arrays.toString(headerRecord));
+ }
+ // Note: This will always allow a duplicate header if the header is empty
+ final boolean containsHeader = header != null && hdrMap.containsKey(header);
+ if (containsHeader && !emptyHeader && !this.format.getAllowDuplicateHeaderNames()) {
+ throw new IllegalArgumentException(
+ String.format(
+ "The header contains a duplicate name: \"%s\" in %s. If this is valid then use CSVFormat.withAllowDuplicateHeaderNames().",
+ header, Arrays.toString(headerRecord)));
+ }
+ if (header != null) {
+ hdrMap.put(header, Integer.valueOf(i));
+ if (headerNames == null) {
+ headerNames = new ArrayList<>(headerRecord.length);
+ }
+ headerNames.add(header);
+ }
+ }
+ }
+ }
+ if (headerNames == null) {
+ headerNames = Collections.emptyList(); //immutable
+ } else {
+ headerNames = Collections.unmodifiableList(headerNames);
+ }
+ return new Headers(hdrMap, headerNames);
+ }
+
+ /**
+ * Returns the current line number in the input stream.
+ *
+ * <p>
+ * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
+ * the record number.
+ * </p>
+ *
+ * @return current line number
+ */
+ public long getCurrentLineNumber() {
+ return this.lexer.getCurrentLineNumber();
+ }
+
+ /**
+ * Gets the first end-of-line string encountered.
+ *
+ * @return the first end-of-line string
+ * @since 1.5
+ */
+ public String getFirstEndOfLine() {
+ return lexer.getFirstEol();
+ }
+
+ /**
+ * Returns a copy of the header map.
+ * <p>
+ * The map keys are column names. The map values are 0-based indices.
+ * </p>
+ * <p>
+ * Note: The map can only provide a one-to-one mapping when the format did not
+ * contain null or duplicate column names.
+ * </p>
+ *
+ * @return a copy of the header map.
+ */
+ public Map<String, Integer> getHeaderMap() {
+ if (this.headerMap == null) {
+ return null;
+ }
+ final Map<String, Integer> map = createEmptyHeaderMap();
+ map.putAll(this.headerMap);
+ return map;
+ }
+
+ /**
+ * Returns the header map.
+ *
+ * @return the header map.
+ */
+ Map<String, Integer> getHeaderMapRaw() {
+ return this.headerMap;
+ }
+
+ /**
+ * Returns a read-only list of header names that iterates in column order.
+ * <p>
+ * Note: The list provides strings that can be used as keys in the header map.
+ * The list will not contain null column names if they were present in the input
+ * format.
+ * </p>
+ *
+ * @return read-only list of header names that iterates in column order.
+ * @see #getHeaderMap()
+ * @since 1.7
+ */
+ public List<String> getHeaderNames() {
+ return headerNames;
+ }
+
+ /**
+ * Returns the current record number in the input stream.
+ *
+ * <p>
+ * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
+ * the line number.
+ * </p>
+ *
+ * @return current record number
+ */
+ public long getRecordNumber() {
+ return this.recordNumber;
+ }
+
+ /**
+ * Parses the CSV input according to the given format and returns the content as a list of
+ * {@link CSVRecord CSVRecords}.
+ *
+ * <p>
+ * The returned content starts at the current parse-position in the stream.
+ * </p>
+ *
+ * @return list of {@link CSVRecord CSVRecords}, may be empty
+ * @throws IOException
+ * on parse error or input read-failure
+ */
+ public List<CSVRecord> getRecords() throws IOException {
+ CSVRecord rec;
+ final List<CSVRecord> records = new ArrayList<>();
+ while ((rec = this.nextRecord()) != null) {
+ records.add(rec);
+ }
+ return records;
+ }
+
+ /**
+ * Gets whether this parser is closed.
+ *
+ * @return whether this parser is closed.
+ */
+ public boolean isClosed() {
+ return this.lexer.isClosed();
+ }
+
+ /**
+ * Returns an iterator on the records.
+ *
+ * <p>
+ * An {@link IOException} caught during the iteration are re-thrown as an
+ * {@link IllegalStateException}.
+ * </p>
+ * <p>
+ * If the parser is closed a call to {@link Iterator#next()} will throw a
+ * {@link NoSuchElementException}.
+ * </p>
+ */
+ @Override
+ public Iterator<CSVRecord> iterator() {
+ return csvRecordIterator;
+ }
+
+ /**
+ * Parses the next record from the current point in the stream.
+ *
+ * @return the record as an array of values, or {@code null} if the end of the stream has been reached
+ * @throws IOException
+ * on parse error or input read-failure
+ */
+ CSVRecord nextRecord() throws IOException {
+ CSVRecord result = null;
+ this.recordList.clear();
+ StringBuilder sb = null;
+ final long startCharPosition = lexer.getCharacterPosition() + this.characterOffset;
+ do {
+ this.reusableToken.reset();
+ this.lexer.nextToken(this.reusableToken);
+ switch (this.reusableToken.type) {
+ case TOKEN:
+ this.addRecordValue(false);
+ break;
+ case EORECORD:
+ this.addRecordValue(true);
+ break;
+ case EOF:
+ if (this.reusableToken.isReady) {
+ this.addRecordValue(true);
+ }
+ break;
+ case INVALID:
+ throw new IOException("(line " + this.getCurrentLineNumber() + ") invalid parse sequence");
+ case COMMENT: // Ignored currently
+ if (sb == null) { // first comment for this record
+ sb = new StringBuilder();
+ } else {
+ sb.append(Constants.LF);
+ }
+ sb.append(this.reusableToken.content);
+ this.reusableToken.type = TOKEN; // Read another token
+ break;
+ default:
+ throw new IllegalStateException("Unexpected Token type: " + this.reusableToken.type);
+ }
+ } while (this.reusableToken.type == TOKEN);
+
+ if (!this.recordList.isEmpty()) {
+ this.recordNumber++;
+ final String comment = sb == null ? null : sb.toString();
+ result = new CSVRecord(this, this.recordList.toArray(new String[this.recordList.size()]),
+ comment, this.recordNumber, startCharPosition);
+ }
+ return result;
+ }
+
+}
diff --git a/src/test/resources/org/apache/commons/csv/CSVPrinter.java b/src/test/resources/org/apache/commons/csv/CSVPrinter.java
new file mode 100644
index 0000000..a0cc612
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSVPrinter.java
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Constants.CR;
+import static org.apache.commons.csv.Constants.LF;
+import static org.apache.commons.csv.Constants.SP;
+
+import java.io.Closeable;
+import java.io.Flushable;
+import java.io.IOException;
+import java.sql.Clob;
+import java.sql.ResultSet;
+import java.sql.SQLException;
+import java.util.Arrays;
+import java.util.Objects;
+
+/**
+ * Prints values in a {@link CSVFormat CSV format}.
+ *
+ * <p>Values can be appended to the output by calling the {@link #print(Object)} method.
+ * Values are printed according to {@link String#valueOf(Object)}.
+ * To complete a record the {@link #println()} method has to be called.
+ * Comments can be appended by calling {@link #printComment(String)}.
+ * However a comment will only be written to the output if the {@link CSVFormat} supports comments.
+ * </p>
+ *
+ * <p>The printer also supports appending a complete record at once by calling {@link #printRecord(Object...)}
+ * or {@link #printRecord(Iterable)}.
+ * Furthermore {@link #printRecords(Object...)}, {@link #printRecords(Iterable)} and {@link #printRecords(ResultSet)}
+ * methods can be used to print several records at once.
+ * </p>
+ *
+ * <p>Example:</p>
+ *
+ * <pre>
+ * try (CSVPrinter printer = new CSVPrinter(new FileWriter("csv.txt"), CSVFormat.EXCEL)) {
+ * printer.printRecord("id", "userName", "firstName", "lastName", "birthday");
+ * printer.printRecord(1, "john73", "John", "Doe", LocalDate.of(1973, 9, 15));
+ * printer.println();
+ * printer.printRecord(2, "mary", "Mary", "Meyer", LocalDate.of(1985, 3, 29));
+ * } catch (IOException ex) {
+ * ex.printStackTrace();
+ * }
+ * </pre>
+ *
+ * <p>This code will write the following to csv.txt:</p>
+ * <pre>
+ * id,userName,firstName,lastName,birthday
+ * 1,john73,John,Doe,1973-09-15
+ *
+ * 2,mary,Mary,Meyer,1985-03-29
+ * </pre>
+ */
+public final class CSVPrinter implements Flushable, Closeable {
+
+ /** The place that the values get written. */
+ private final Appendable out;
+ private final CSVFormat format;
+
+ /** True if we just began a new record. */
+ private boolean newRecord = true;
+
+ /**
+ * Creates a printer that will print values to the given stream following the CSVFormat.
+ * <p>
+ * Currently, only a pure encapsulation format or a pure escaping format is supported. Hybrid formats (encapsulation
+ * and escaping with a different character) are not supported.
+ * </p>
+ *
+ * @param out
+ * stream to which to print. Must not be null.
+ * @param format
+ * the CSV format. Must not be null.
+ * @throws IOException
+ * thrown if the optional header cannot be printed.
+ * @throws IllegalArgumentException
+ * thrown if the parameters of the format are inconsistent or if either out or format are null.
+ */
+ public CSVPrinter(final Appendable out, final CSVFormat format) throws IOException {
+ Objects.requireNonNull(out, "out");
+ Objects.requireNonNull(format, "format");
+
+ this.out = out;
+ this.format = format;
+ // TODO: Is it a good idea to do this here instead of on the first call to a print method?
+ // It seems a pain to have to track whether the header has already been printed or not.
+ if (format.getHeaderComments() != null) {
+ for (final String line : format.getHeaderComments()) {
+ if (line != null) {
+ this.printComment(line);
+ }
+ }
+ }
+ if (format.getHeader() != null && !format.getSkipHeaderRecord()) {
+ this.printRecord((Object[]) format.getHeader());
+ }
+ }
+
+ // ======================================================
+ // printing implementation
+ // ======================================================
+
+ @Override
+ public void close() throws IOException {
+ close(false);
+ }
+
+ /**
+ * Closes the underlying stream with an optional flush first.
+ * @param flush whether to flush before the actual close.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ * @since 1.6
+ */
+ public void close(final boolean flush) throws IOException {
+ if (flush || format.getAutoFlush()) {
+ flush();
+ }
+ if (out instanceof Closeable) {
+ ((Closeable) out).close();
+ }
+ }
+
+ /**
+ * Flushes the underlying stream.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @Override
+ public void flush() throws IOException {
+ if (out instanceof Flushable) {
+ ((Flushable) out).flush();
+ }
+ }
+
+ /**
+ * Gets the target Appendable.
+ *
+ * @return the target Appendable.
+ */
+ public Appendable getOut() {
+ return this.out;
+ }
+
+ /**
+ * Prints the string as the next value on the line. The value will be escaped or encapsulated as needed.
+ *
+ * @param value
+ * value to be output.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void print(final Object value) throws IOException {
+ format.print(value, out, newRecord);
+ newRecord = false;
+ }
+
+ /**
+ * Prints a comment on a new line among the delimiter separated values.
+ *
+ * <p>
+ * Comments will always begin on a new line and occupy at least one full line. The character specified to start
+ * comments and a space will be inserted at the beginning of each new line in the comment.
+ * </p>
+ *
+ * <p>
+ * If comments are disabled in the current CSV format this method does nothing.
+ * </p>
+ *
+ * <p>This method detects line breaks inside the comment string and inserts {@link CSVFormat#getRecordSeparator()}
+ * to start a new line of the comment. Note that this might produce unexpected results for formats that do not use
+ * line breaks as record separator.</p>
+ *
+ * @param comment
+ * the comment to output
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void printComment(final String comment) throws IOException {
+ if (!format.isCommentMarkerSet()) {
+ return;
+ }
+ if (!newRecord) {
+ println();
+ }
+ out.append(format.getCommentMarker().charValue());
+ out.append(SP);
+ for (int i = 0; i < comment.length(); i++) {
+ final char c = comment.charAt(i);
+ switch (c) {
+ case CR:
+ if (i + 1 < comment.length() && comment.charAt(i + 1) == LF) {
+ i++;
+ }
+ //$FALL-THROUGH$ break intentionally excluded.
+ case LF:
+ println();
+ out.append(format.getCommentMarker().charValue());
+ out.append(SP);
+ break;
+ default:
+ out.append(c);
+ break;
+ }
+ }
+ println();
+ }
+
+ /**
+ * Outputs the record separator.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void println() throws IOException {
+ format.println(out);
+ newRecord = true;
+ }
+
+ /**
+ * Prints the given values a single record of delimiter separated values followed by the record separator.
+ *
+ * <p>
+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
+ * separator to the output after printing the record, so there is no need to call {@link #println()}.
+ * </p>
+ *
+ * @param values
+ * values to output.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void printRecord(final Iterable<?> values) throws IOException {
+ for (final Object value : values) {
+ print(value);
+ }
+ println();
+ }
+
+ /**
+ * Prints the given values a single record of delimiter separated values followed by the record separator.
+ *
+ * <p>
+ * The values will be quoted if needed. Quotes and newLine characters will be escaped. This method adds the record
+ * separator to the output after printing the record, so there is no need to call {@link #println()}.
+ * </p>
+ *
+ * @param values
+ * values to output.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void printRecord(final Object... values) throws IOException {
+ format.printRecord(out, values);
+ newRecord = true;
+ }
+
+ /**
+ * Prints all the objects in the given collection handling nested collections/arrays as records.
+ *
+ * <p>
+ * If the given collection only contains simple objects, this method will print a single record like
+ * {@link #printRecord(Iterable)}. If the given collections contains nested collections/arrays those nested elements
+ * will each be printed as records using {@link #printRecord(Object...)}.
+ * </p>
+ *
+ * <p>
+ * Given the following data structure:
+ * </p>
+ *
+ * <pre>
+ * <code>
+ * List<String[]> data = ...
+ * data.add(new String[]{ "A", "B", "C" });
+ * data.add(new String[]{ "1", "2", "3" });
+ * data.add(new String[]{ "A1", "B2", "C3" });
+ * </code>
+ * </pre>
+ *
+ * <p>
+ * Calling this method will print:
+ * </p>
+ *
+ * <pre>
+ * <code>
+ * A, B, C
+ * 1, 2, 3
+ * A1, B2, C3
+ * </code>
+ * </pre>
+ *
+ * @param values
+ * the values to print.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void printRecords(final Iterable<?> values) throws IOException {
+ for (final Object value : values) {
+ if (value instanceof Object[]) {
+ this.printRecord((Object[]) value);
+ } else if (value instanceof Iterable) {
+ this.printRecord((Iterable<?>) value);
+ } else {
+ this.printRecord(value);
+ }
+ }
+ }
+
+ /**
+ * Prints all the objects in the given array handling nested collections/arrays as records.
+ *
+ * <p>
+ * If the given array only contains simple objects, this method will print a single record like
+ * {@link #printRecord(Object...)}. If the given collections contains nested collections/arrays those nested
+ * elements will each be printed as records using {@link #printRecord(Object...)}.
+ * </p>
+ *
+ * <p>
+ * Given the following data structure:
+ * </p>
+ *
+ * <pre>
+ * <code>
+ * String[][] data = new String[3][]
+ * data[0] = String[]{ "A", "B", "C" };
+ * data[1] = new String[]{ "1", "2", "3" };
+ * data[2] = new String[]{ "A1", "B2", "C3" };
+ * </code>
+ * </pre>
+ *
+ * <p>
+ * Calling this method will print:
+ * </p>
+ *
+ * <pre>
+ * <code>
+ * A, B, C
+ * 1, 2, 3
+ * A1, B2, C3
+ * </code>
+ * </pre>
+ *
+ * @param values
+ * the values to print.
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ public void printRecords(final Object... values) throws IOException {
+ printRecords(Arrays.asList(values));
+ }
+
+ /**
+ * Prints all the objects in the given JDBC result set.
+ *
+ * @param resultSet
+ * result set the values to print.
+ * @throws IOException
+ * If an I/O error occurs
+ * @throws SQLException
+ * if a database access error occurs
+ */
+ public void printRecords(final ResultSet resultSet) throws SQLException, IOException {
+ final int columnCount = resultSet.getMetaData().getColumnCount();
+ while (resultSet.next()) {
+ for (int i = 1; i <= columnCount; i++) {
+ final Object object = resultSet.getObject(i);
+ // TODO Who manages the Clob? The JDBC driver or must we close it? Is it driver-dependent?
+ print(object instanceof Clob ? ((Clob) object).getCharacterStream() : object);
+ }
+ println();
+ }
+ }
+}
diff --git a/src/test/resources/org/apache/commons/csv/CSVRecord.java b/src/test/resources/org/apache/commons/csv/CSVRecord.java
new file mode 100644
index 0000000..5181bc9
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/CSVRecord.java
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Objects;
+
+/**
+ * A CSV record parsed from a CSV file.
+ *
+ * <p>
+ * Note: Support for {@link Serializable} is scheduled to be removed in version 2.0.
+ * In version 1.8 the mapping between the column header and the column index was
+ * removed from the serialised state. The class maintains serialization compatibility
+ * with versions pre-1.8 for the record values; these must be accessed by index
+ * following deserialization. There will be loss of any functionally linked to the header
+ * mapping when transferring serialised forms pre-1.8 to 1.8 and vice versa.
+ * </p>
+ */
+public final class CSVRecord implements Serializable, Iterable<String> {
+
+ private static final String[] EMPTY_STRING_ARRAY = new String[0];
+
+ private static final long serialVersionUID = 1L;
+
+ private final long characterPosition;
+
+ /** The accumulated comments (if any) */
+ private final String comment;
+
+ /** The record number. */
+ private final long recordNumber;
+
+ /** The values of the record */
+ private final String[] values;
+
+ /** The parser that originates this record. This is not serialized. */
+ private final transient CSVParser parser;
+
+ CSVRecord(final CSVParser parser, final String[] values, final String comment, final long recordNumber,
+ final long characterPosition) {
+ this.recordNumber = recordNumber;
+ this.values = values != null ? values : EMPTY_STRING_ARRAY;
+ this.parser = parser;
+ this.comment = comment;
+ this.characterPosition = characterPosition;
+ }
+
+ /**
+ * Returns a value by {@link Enum}.
+ *
+ * @param e
+ * an enum
+ * @return the String at the given enum String
+ */
+ public String get(final Enum<?> e) {
+ return get(Objects.toString(e, null));
+ }
+
+ /**
+ * Returns a value by index.
+ *
+ * @param i
+ * a column index (0-based)
+ * @return the String at the given index
+ */
+ public String get(final int i) {
+ return values[i];
+ }
+
+ /**
+ * Returns a value by name.
+ *
+ * <p>
+ * Note: This requires a field mapping obtained from the original parser.
+ * A check using {@link #isMapped(String)} should be used to determine if a
+ * mapping exists from the provided {@code name} to a field index. In this case an
+ * exception will only be thrown if the record does not contain a field corresponding
+ * to the mapping, that is the record length is not consistent with the mapping size.
+ * </p>
+ *
+ * @param name
+ * the name of the column to be retrieved.
+ * @return the column value, maybe null depending on {@link CSVFormat#getNullString()}.
+ * @throws IllegalStateException
+ * if no header mapping was provided
+ * @throws IllegalArgumentException
+ * if {@code name} is not mapped or if the record is inconsistent
+ * @see #isMapped(String)
+ * @see #isConsistent()
+ * @see #getParser()
+ * @see CSVFormat#withNullString(String)
+ */
+ public String get(final String name) {
+ final Map<String, Integer> headerMap = getHeaderMapRaw();
+ if (headerMap == null) {
+ throw new IllegalStateException(
+ "No header mapping was specified, the record values can't be accessed by name");
+ }
+ final Integer index = headerMap.get(name);
+ if (index == null) {
+ throw new IllegalArgumentException(String.format("Mapping for %s not found, expected one of %s", name,
+ headerMap.keySet()));
+ }
+ try {
+ return values[index.intValue()];
+ } catch (final ArrayIndexOutOfBoundsException e) {
+ throw new IllegalArgumentException(String.format(
+ "Index for header '%s' is %d but CSVRecord only has %d values!", name, index,
+ Integer.valueOf(values.length)));
+ }
+ }
+
+ /**
+ * Returns the start position of this record as a character position in the source stream. This may or may not
+ * correspond to the byte position depending on the character set.
+ *
+ * @return the position of this record in the source stream.
+ */
+ public long getCharacterPosition() {
+ return characterPosition;
+ }
+
+ /**
+ * Returns the comment for this record, if any.
+ * Note that comments are attached to the following record.
+ * If there is no following record (i.e. the comment is at EOF)
+ * the comment will be ignored.
+ *
+ * @return the comment for this record, or null if no comment for this record is available.
+ */
+ public String getComment() {
+ return comment;
+ }
+
+ private Map<String, Integer> getHeaderMapRaw() {
+ return parser == null ? null : parser.getHeaderMapRaw();
+ }
+
+ /**
+ * Returns the parser.
+ *
+ * <p>
+ * Note: The parser is not part of the serialized state of the record. A null check
+ * should be used when the record may have originated from a serialized form.
+ * </p>
+ *
+ * @return the parser.
+ * @since 1.7
+ */
+ public CSVParser getParser() {
+ return parser;
+ }
+
+ /**
+ * Returns the number of this record in the parsed CSV file.
+ *
+ * <p>
+ * <strong>ATTENTION:</strong> If your CSV input has multi-line values, the returned number does not correspond to
+ * the current line number of the parser that created this record.
+ * </p>
+ *
+ * @return the number of this record.
+ * @see CSVParser#getCurrentLineNumber()
+ */
+ public long getRecordNumber() {
+ return recordNumber;
+ }
+
+ /**
+ * Checks whether this record has a comment, false otherwise.
+ * Note that comments are attached to the following record.
+ * If there is no following record (i.e. the comment is at EOF)
+ * the comment will be ignored.
+ *
+ * @return true if this record has a comment, false otherwise
+ * @since 1.3
+ */
+ public boolean hasComment() {
+ return comment != null;
+ }
+
+ /**
+ * Tells whether the record size matches the header size.
+ *
+ * <p>
+ * Returns true if the sizes for this record match and false if not. Some programs can export files that fail this
+ * test but still produce parsable files.
+ * </p>
+ *
+ * @return true of this record is valid, false if not
+ */
+ public boolean isConsistent() {
+ final Map<String, Integer> headerMap = getHeaderMapRaw();
+ return headerMap == null || headerMap.size() == values.length;
+ }
+
+ /**
+ * Checks whether a given column is mapped, i.e. its name has been defined to the parser.
+ *
+ * @param name
+ * the name of the column to be retrieved.
+ * @return whether a given column is mapped.
+ */
+ public boolean isMapped(final String name) {
+ final Map<String, Integer> headerMap = getHeaderMapRaw();
+ return headerMap != null && headerMap.containsKey(name);
+ }
+
+ /**
+ * Checks whether a column with given index has a value.
+ *
+ * @param index
+ * a column index (0-based)
+ * @return whether a column with given index has a value
+ */
+ public boolean isSet(final int index) {
+ return 0 <= index && index < values.length;
+ }
+
+ /**
+ * Checks whether a given columns is mapped and has a value.
+ *
+ * @param name
+ * the name of the column to be retrieved.
+ * @return whether a given columns is mapped and has a value
+ */
+ public boolean isSet(final String name) {
+ return isMapped(name) && getHeaderMapRaw().get(name).intValue() < values.length;
+ }
+
+ /**
+ * Returns an iterator over the values of this record.
+ *
+ * @return an iterator over the values of this record.
+ */
+ @Override
+ public Iterator<String> iterator() {
+ return toList().iterator();
+ }
+
+ /**
+ * Puts all values of this record into the given Map.
+ *
+ * @param map
+ * The Map to populate.
+ * @return the given map.
+ * @since 1.9
+ */
+ public <M extends Map<String, String>> M putIn(final M map) {
+ if (getHeaderMapRaw() == null) {
+ return map;
+ }
+ for (final Entry<String, Integer> entry : getHeaderMapRaw().entrySet()) {
+ final int col = entry.getValue().intValue();
+ if (col < values.length) {
+ map.put(entry.getKey(), values[col]);
+ }
+ }
+ return map;
+ }
+
+ /**
+ * Returns the number of values in this record.
+ *
+ * @return the number of values.
+ */
+ public int size() {
+ return values.length;
+ }
+
+ /**
+ * Converts the values to a List.
+ *
+ * TODO: Maybe make this public?
+ *
+ * @return a new List
+ */
+ private List<String> toList() {
+ return Arrays.asList(values);
+ }
+
+ /**
+ * Copies this record into a new Map of header name to record value.
+ *
+ * @return A new Map. The map is empty if the record has no headers.
+ */
+ public Map<String, String> toMap() {
+ return putIn(new LinkedHashMap<String, String>(values.length));
+ }
+
+ /**
+ * Returns a string representation of the contents of this record. The result is constructed by comment, mapping,
+ * recordNumber and by passing the internal values array to {@link Arrays#toString(Object[])}.
+ *
+ * @return a String representation of this record.
+ */
+ @Override
+ public String toString() {
+ return "CSVRecord [comment='" + comment + "', recordNumber=" + recordNumber + ", values=" +
+ Arrays.toString(values) + "]";
+ }
+
+ String[] values() {
+ return values;
+ }
+
+}
diff --git a/src/test/resources/org/apache/commons/csv/Constants.java b/src/test/resources/org/apache/commons/csv/Constants.java
new file mode 100644
index 0000000..b7dc770
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/Constants.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+/**
+ * Constants for this package.
+ */
+final class Constants {
+
+ static final char BACKSLASH = '\\';
+
+ static final char BACKSPACE = '\b';
+
+ static final char COMMA = ',';
+
+ /**
+ * Starts a comment, the remainder of the line is the comment.
+ */
+ static final char COMMENT = '#';
+
+ static final char CR = '\r';
+
+ /** RFC 4180 defines line breaks as CRLF */
+ static final String CRLF = "\r\n";
+
+ static final Character DOUBLE_QUOTE_CHAR = Character.valueOf('"');
+
+ static final String EMPTY = "";
+
+ /** The end of stream symbol */
+ static final int END_OF_STREAM = -1;
+
+ static final char FF = '\f';
+
+ static final char LF = '\n';
+
+ /**
+ * Unicode line separator.
+ */
+ static final String LINE_SEPARATOR = "\u2028";
+
+ /**
+ * Unicode next line.
+ */
+ static final String NEXT_LINE = "\u0085";
+
+ /**
+ * Unicode paragraph separator.
+ */
+ static final String PARAGRAPH_SEPARATOR = "\u2029";
+
+ static final char PIPE = '|';
+
+ /** ASCII record separator */
+ static final char RS = 30;
+
+ static final char SP = ' ';
+
+ static final char TAB = '\t';
+
+ /** Undefined state for the lookahead char */
+ static final int UNDEFINED = -2;
+
+ /** ASCII unit separator */
+ static final char US = 31;
+
+}
diff --git a/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java b/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java
new file mode 100644
index 0000000..b9ca79d
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/ExtendedBufferedReader.java
@@ -0,0 +1,191 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Constants.CR;
+import static org.apache.commons.csv.Constants.END_OF_STREAM;
+import static org.apache.commons.csv.Constants.LF;
+import static org.apache.commons.csv.Constants.UNDEFINED;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * A special buffered reader which supports sophisticated read access.
+ * <p>
+ * In particular the reader supports a look-ahead option, which allows you to see the next char returned by
+ * {@link #read()}. This reader also tracks how many characters have been read with {@link #getPosition()}.
+ * </p>
+ */
+final class ExtendedBufferedReader extends BufferedReader {
+
+ /** The last char returned */
+ private int lastChar = UNDEFINED;
+
+ /** The count of EOLs (CR/LF/CRLF) seen so far */
+ private long eolCounter;
+
+ /** The position, which is number of characters read so far */
+ private long position;
+
+ private boolean closed;
+
+ /**
+ * Created extended buffered reader using default buffer-size
+ */
+ ExtendedBufferedReader(final Reader reader) {
+ super(reader);
+ }
+
+ /**
+ * Closes the stream.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ // Set ivars before calling super close() in case close() throws an IOException.
+ closed = true;
+ lastChar = END_OF_STREAM;
+ super.close();
+ }
+
+ /**
+ * Returns the current line number
+ *
+ * @return the current line number
+ */
+ long getCurrentLineNumber() {
+ // Check if we are at EOL or EOF or just starting
+ if (lastChar == CR || lastChar == LF || lastChar == UNDEFINED || lastChar == END_OF_STREAM) {
+ return eolCounter; // counter is accurate
+ }
+ return eolCounter + 1; // Allow for counter being incremented only at EOL
+ }
+
+ /**
+ * Returns the last character that was read as an integer (0 to 65535). This will be the last character returned by
+ * any of the read methods. This will not include a character read using the {@link #lookAhead()} method. If no
+ * character has been read then this will return {@link Constants#UNDEFINED}. If the end of the stream was reached
+ * on the last read then this will return {@link Constants#END_OF_STREAM}.
+ *
+ * @return the last character that was read
+ */
+ int getLastChar() {
+ return lastChar;
+ }
+
+ /**
+ * Gets the character position in the reader.
+ *
+ * @return the current position in the reader (counting characters, not bytes since this is a Reader)
+ */
+ long getPosition() {
+ return this.position;
+ }
+
+ public boolean isClosed() {
+ return closed;
+ }
+
+ /**
+ * Returns the next character in the current reader without consuming it. So the next call to {@link #read()} will
+ * still return this value. Does not affect line number or last character.
+ *
+ * @return the next character
+ *
+ * @throws IOException
+ * if there is an error in reading
+ */
+ int lookAhead() throws IOException {
+ super.mark(1);
+ final int c = super.read();
+ super.reset();
+
+ return c;
+ }
+
+ @Override
+ public int read() throws IOException {
+ final int current = super.read();
+ if (current == CR || current == LF && lastChar != CR) {
+ eolCounter++;
+ }
+ lastChar = current;
+ this.position++;
+ return lastChar;
+ }
+
+ @Override
+ public int read(final char[] buf, final int offset, final int length) throws IOException {
+ if (length == 0) {
+ return 0;
+ }
+
+ final int len = super.read(buf, offset, length);
+
+ if (len > 0) {
+
+ for (int i = offset; i < offset + len; i++) {
+ final char ch = buf[i];
+ if (ch == LF) {
+ if (CR != (i > 0 ? buf[i - 1] : lastChar)) {
+ eolCounter++;
+ }
+ } else if (ch == CR) {
+ eolCounter++;
+ }
+ }
+
+ lastChar = buf[offset + len - 1];
+
+ } else if (len == -1) {
+ lastChar = END_OF_STREAM;
+ }
+
+ position += len;
+ return len;
+ }
+
+ /**
+ * Calls {@link BufferedReader#readLine()} which drops the line terminator(s). This method should only be called
+ * when processing a comment, otherwise information can be lost.
+ * <p>
+ * Increments {@link #eolCounter}
+ * <p>
+ * Sets {@link #lastChar} to {@link Constants#END_OF_STREAM} at EOF, otherwise to LF
+ *
+ * @return the line that was read, or null if reached EOF.
+ */
+ @Override
+ public String readLine() throws IOException {
+ final String line = super.readLine();
+
+ if (line != null) {
+ lastChar = LF; // needed for detecting start of line
+ eolCounter++;
+ } else {
+ lastChar = END_OF_STREAM;
+ }
+
+ return line;
+ }
+
+}
diff --git a/src/test/resources/org/apache/commons/csv/IOUtils.java b/src/test/resources/org/apache/commons/csv/IOUtils.java
new file mode 100644
index 0000000..1771d4d
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/IOUtils.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.csv;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.CharBuffer;
+
+/** Copied from Apache Commons IO. */
+class IOUtils {
+
+ /**
+ * <p>
+ * Copied from Apache Commons IO.
+ * </p>
+ * The default buffer size ({@value}).
+ */
+ static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
+
+ /**
+ * <p>
+ * Copied from Apache Commons IO.
+ * </p>
+ * Represents the end-of-file (or stream).
+ * @since 2.5 (made public)
+ */
+ private static final int EOF = -1;
+
+ /**
+ * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
+ * <p>
+ * This method buffers the input internally, so there is no need to use a
+ * {@code BufferedReader}.
+ * </p>
+ * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
+ *
+ * @param input the {@code Reader} to read from
+ * @param output the {@code Appendable} to append to
+ * @return the number of characters copied
+ * @throws NullPointerException if the input or output is null
+ * @throws IOException if an I/O error occurs
+ * @since 2.7
+ */
+ static long copy(final Reader input, final Appendable output) throws IOException {
+ return copy(input, output, CharBuffer.allocate(DEFAULT_BUFFER_SIZE));
+ }
+
+ /**
+ * Copies chars from a large (over 2GB) {@code Reader} to an {@code Appendable}.
+ * <p>
+ * This method uses the provided buffer, so there is no need to use a
+ * {@code BufferedReader}.
+ * </p>
+ *
+ * @param input the {@code Reader} to read from
+ * @param output the {@code Appendable} to write to
+ * @param buffer the buffer to be used for the copy
+ * @return the number of characters copied
+ * @throws NullPointerException if the input or output is null
+ * @throws IOException if an I/O error occurs
+ * @since 2.7
+ */
+ static long copy(final Reader input, final Appendable output, final CharBuffer buffer) throws IOException {
+ long count = 0;
+ int n;
+ while (EOF != (n = input.read(buffer))) {
+ buffer.flip();
+ output.append(buffer, 0, n);
+ count += n;
+ }
+ return count;
+ }
+
+ /**
+ * <p>
+ * Copied from Apache Commons IO.
+ * </p>
+ * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
+ * <p>
+ * This method buffers the input internally, so there is no need to use a
+ * {@code BufferedReader}.
+ * <p>
+ * The buffer size is given by {@link #DEFAULT_BUFFER_SIZE}.
+ *
+ * @param input the {@code Reader} to read from
+ * @param output the {@code Writer} to write to
+ * @return the number of characters copied
+ * @throws NullPointerException if the input or output is null
+ * @throws IOException if an I/O error occurs
+ * @since 1.3
+ */
+ static long copyLarge(final Reader input, final Writer output) throws IOException {
+ return copyLarge(input, output, new char[DEFAULT_BUFFER_SIZE]);
+ }
+
+ /**
+ * <p>
+ * Copied from Apache Commons IO.
+ * </p>
+ * Copies chars from a large (over 2GB) {@code Reader} to a {@code Writer}.
+ * <p>
+ * This method uses the provided buffer, so there is no need to use a
+ * {@code BufferedReader}.
+ * <p>
+ *
+ * @param input the {@code Reader} to read from
+ * @param output the {@code Writer} to write to
+ * @param buffer the buffer to be used for the copy
+ * @return the number of characters copied
+ * @throws NullPointerException if the input or output is null
+ * @throws IOException if an I/O error occurs
+ * @since 2.2
+ */
+ static long copyLarge(final Reader input, final Writer output, final char[] buffer) throws IOException {
+ long count = 0;
+ int n;
+ while (EOF != (n = input.read(buffer))) {
+ output.write(buffer, 0, n);
+ count += n;
+ }
+ return count;
+ }
+
+}
diff --git a/src/test/resources/org/apache/commons/csv/Lexer.java b/src/test/resources/org/apache/commons/csv/Lexer.java
new file mode 100644
index 0000000..2795ca2
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/Lexer.java
@@ -0,0 +1,461 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Constants.BACKSPACE;
+import static org.apache.commons.csv.Constants.CR;
+import static org.apache.commons.csv.Constants.END_OF_STREAM;
+import static org.apache.commons.csv.Constants.FF;
+import static org.apache.commons.csv.Constants.LF;
+import static org.apache.commons.csv.Constants.TAB;
+import static org.apache.commons.csv.Constants.UNDEFINED;
+import static org.apache.commons.csv.Token.Type.COMMENT;
+import static org.apache.commons.csv.Token.Type.EOF;
+import static org.apache.commons.csv.Token.Type.EORECORD;
+import static org.apache.commons.csv.Token.Type.INVALID;
+import static org.apache.commons.csv.Token.Type.TOKEN;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+/**
+ * Lexical analyzer.
+ */
+final class Lexer implements Closeable {
+
+ private static final String CR_STRING = Character.toString(CR);
+ private static final String LF_STRING = Character.toString(LF);
+
+ /**
+ * Constant char to use for disabling comments, escapes and encapsulation. The value -2 is used because it
+ * won't be confused with an EOF signal (-1), and because the Unicode value {@code FFFE} would be encoded as two
+ * chars (using surrogates) and thus there should never be a collision with a real text char.
+ */
+ private static final char DISABLED = '\ufffe';
+
+ private final char delimiter;
+ private final char escape;
+ private final char quoteChar;
+ private final char commentStart;
+
+ private final boolean ignoreSurroundingSpaces;
+ private final boolean ignoreEmptyLines;
+
+ /** The input stream */
+ private final ExtendedBufferedReader reader;
+ private String firstEol;
+
+ Lexer(final CSVFormat format, final ExtendedBufferedReader reader) {
+ this.reader = reader;
+ this.delimiter = format.getDelimiter();
+ this.escape = mapNullToDisabled(format.getEscapeCharacter());
+ this.quoteChar = mapNullToDisabled(format.getQuoteCharacter());
+ this.commentStart = mapNullToDisabled(format.getCommentMarker());
+ this.ignoreSurroundingSpaces = format.getIgnoreSurroundingSpaces();
+ this.ignoreEmptyLines = format.getIgnoreEmptyLines();
+ }
+
+ /**
+ * Closes resources.
+ *
+ * @throws IOException
+ * If an I/O error occurs
+ */
+ @Override
+ public void close() throws IOException {
+ reader.close();
+ }
+
+ /**
+ * Returns the current character position
+ *
+ * @return the current character position
+ */
+ long getCharacterPosition() {
+ return reader.getPosition();
+ }
+
+ /**
+ * Returns the current line number
+ *
+ * @return the current line number
+ */
+ long getCurrentLineNumber() {
+ return reader.getCurrentLineNumber();
+ }
+
+ String getFirstEol(){
+ return firstEol;
+ }
+
+ boolean isClosed() {
+ return reader.isClosed();
+ }
+
+ boolean isCommentStart(final int ch) {
+ return ch == commentStart;
+ }
+
+ boolean isDelimiter(final int ch) {
+ return ch == delimiter;
+ }
+
+ /**
+ * @return true if the given character indicates end of file
+ */
+ boolean isEndOfFile(final int ch) {
+ return ch == END_OF_STREAM;
+ }
+
+ boolean isEscape(final int ch) {
+ return ch == escape;
+ }
+
+ private boolean isMetaChar(final int ch) {
+ return ch == delimiter ||
+ ch == escape ||
+ ch == quoteChar ||
+ ch == commentStart;
+ }
+
+ boolean isQuoteChar(final int ch) {
+ return ch == quoteChar;
+ }
+
+ /**
+ * Checks if the current character represents the start of a line: a CR, LF or is at the start of the file.
+ *
+ * @param ch the character to check
+ * @return true if the character is at the start of a line.
+ */
+ boolean isStartOfLine(final int ch) {
+ return ch == LF || ch == CR || ch == UNDEFINED;
+ }
+
+ /**
+ * @return true if the given char is a whitespace character
+ */
+ boolean isWhitespace(final int ch) {
+ return !isDelimiter(ch) && Character.isWhitespace((char) ch);
+ }
+
+ private char mapNullToDisabled(final Character c) {
+ return c == null ? DISABLED : c.charValue();
+ }
+
+ /**
+ * Returns the next token.
+ * <p>
+ * A token corresponds to a term, a record change or an end-of-file indicator.
+ * </p>
+ *
+ * @param token
+ * an existing Token object to reuse. The caller is responsible to initialize the Token.
+ * @return the next token found
+ * @throws java.io.IOException
+ * on stream access error
+ */
+ Token nextToken(final Token token) throws IOException {
+
+ // get the last read char (required for empty line detection)
+ int lastChar = reader.getLastChar();
+
+ // read the next char and set eol
+ int c = reader.read();
+ /*
+ * Note: The following call will swallow LF if c == CR. But we don't need to know if the last char was CR or LF
+ * - they are equivalent here.
+ */
+ boolean eol = readEndOfLine(c);
+
+ // empty line detection: eol AND (last char was EOL or beginning)
+ if (ignoreEmptyLines) {
+ while (eol && isStartOfLine(lastChar)) {
+ // go on char ahead ...
+ lastChar = c;
+ c = reader.read();
+ eol = readEndOfLine(c);
+ // reached end of file without any content (empty line at the end)
+ if (isEndOfFile(c)) {
+ token.type = EOF;
+ // don't set token.isReady here because no content
+ return token;
+ }
+ }
+ }
+
+ // did we reach eof during the last iteration already ? EOF
+ if (isEndOfFile(lastChar) || !isDelimiter(lastChar) && isEndOfFile(c)) {
+ token.type = EOF;
+ // don't set token.isReady here because no content
+ return token;
+ }
+
+ if (isStartOfLine(lastChar) && isCommentStart(c)) {
+ final String line = reader.readLine();
+ if (line == null) {
+ token.type = EOF;
+ // don't set token.isReady here because no content
+ return token;
+ }
+ final String comment = line.trim();
+ token.content.append(comment);
+ token.type = COMMENT;
+ return token;
+ }
+
+ // important: make sure a new char gets consumed in each iteration
+ while (token.type == INVALID) {
+ // ignore whitespaces at beginning of a token
+ if (ignoreSurroundingSpaces) {
+ while (isWhitespace(c) && !eol) {
+ c = reader.read();
+ eol = readEndOfLine(c);
+ }
+ }
+
+ // ok, start of token reached: encapsulated, or token
+ if (isDelimiter(c)) {
+ // empty token return TOKEN("")
+ token.type = TOKEN;
+ } else if (eol) {
+ // empty token return EORECORD("")
+ // noop: token.content.append("");
+ token.type = EORECORD;
+ } else if (isQuoteChar(c)) {
+ // consume encapsulated token
+ parseEncapsulatedToken(token);
+ } else if (isEndOfFile(c)) {
+ // end of file return EOF()
+ // noop: token.content.append("");
+ token.type = EOF;
+ token.isReady = true; // there is data at EOF
+ } else {
+ // next token must be a simple token
+ // add removed blanks when not ignoring whitespace chars...
+ parseSimpleToken(token, c);
+ }
+ }
+ return token;
+ }
+
+ /**
+ * Parses an encapsulated token.
+ * <p/>
+ * Encapsulated tokens are surrounded by the given encapsulating-string. The encapsulator itself might be included
+ * in the token using a doubling syntax (as "", '') or using escaping (as in \", \'). Whitespaces before and after
+ * an encapsulated token are ignored. The token is finished when one of the following conditions become true:
+ * <ul>
+ * <li>an unescaped encapsulator has been reached, and is followed by optional whitespace then:</li>
+ * <ul>
+ * <li>delimiter (TOKEN)</li>
+ * <li>end of line (EORECORD)</li>
+ * </ul>
+ * <li>end of stream has been reached (EOF)</li> </ul>
+ *
+ * @param token
+ * the current token
+ * @return a valid token object
+ * @throws IOException
+ * on invalid state: EOF before closing encapsulator or invalid character before delimiter or EOL
+ */
+ private Token parseEncapsulatedToken(final Token token) throws IOException {
+ // save current line number in case needed for IOE
+ final long startLineNumber = getCurrentLineNumber();
+ int c;
+ while (true) {
+ c = reader.read();
+
+ if (isEscape(c)) {
+ final int unescaped = readEscape();
+ if (unescaped == END_OF_STREAM) { // unexpected char after escape
+ token.content.append((char) c).append((char) reader.getLastChar());
+ } else {
+ token.content.append((char) unescaped);
+ }
+ } else if (isQuoteChar(c)) {
+ if (isQuoteChar(reader.lookAhead())) {
+ // double or escaped encapsulator -> add single encapsulator to token
+ c = reader.read();
+ token.content.append((char) c);
+ } else {
+ // token finish mark (encapsulator) reached: ignore whitespace till delimiter
+ while (true) {
+ c = reader.read();
+ if (isDelimiter(c)) {
+ token.type = TOKEN;
+ return token;
+ } else if (isEndOfFile(c)) {
+ token.type = EOF;
+ token.isReady = true; // There is data at EOF
+ return token;
+ } else if (readEndOfLine(c)) {
+ token.type = EORECORD;
+ return token;
+ } else if (!isWhitespace(c)) {
+ // error invalid char between token and next delimiter
+ throw new IOException("(line " + getCurrentLineNumber() +
+ ") invalid char between encapsulated token and delimiter");
+ }
+ }
+ }
+ } else if (isEndOfFile(c)) {
+ // error condition (end of file before end of token)
+ throw new IOException("(startline " + startLineNumber +
+ ") EOF reached before encapsulated token finished");
+ } else {
+ // consume character
+ token.content.append((char) c);
+ }
+ }
+ }
+
+ /**
+ * Parses a simple token.
+ * <p/>
+ * Simple token are tokens which are not surrounded by encapsulators. A simple token might contain escaped
+ * delimiters (as \, or \;). The token is finished when one of the following conditions become true:
+ * <ul>
+ * <li>end of line has been reached (EORECORD)</li>
+ * <li>end of stream has been reached (EOF)</li>
+ * <li>an unescaped delimiter has been reached (TOKEN)</li>
+ * </ul>
+ *
+ * @param token
+ * the current token
+ * @param ch
+ * the current character
+ * @return the filled token
+ * @throws IOException
+ * on stream access error
+ */
+ private Token parseSimpleToken(final Token token, int ch) throws IOException {
+ // Faster to use while(true)+break than while(token.type == INVALID)
+ while (true) {
+ if (readEndOfLine(ch)) {
+ token.type = EORECORD;
+ break;
+ } else if (isEndOfFile(ch)) {
+ token.type = EOF;
+ token.isReady = true; // There is data at EOF
+ break;
+ } else if (isDelimiter(ch)) {
+ token.type = TOKEN;
+ break;
+ } else if (isEscape(ch)) {
+ final int unescaped = readEscape();
+ if (unescaped == END_OF_STREAM) { // unexpected char after escape
+ token.content.append((char) ch).append((char) reader.getLastChar());
+ } else {
+ token.content.append((char) unescaped);
+ }
+ ch = reader.read(); // continue
+ } else {
+ token.content.append((char) ch);
+ ch = reader.read(); // continue
+ }
+ }
+
+ if (ignoreSurroundingSpaces) {
+ trimTrailingSpaces(token.content);
+ }
+
+ return token;
+ }
+
+ /**
+ * Greedily accepts \n, \r and \r\n This checker consumes silently the second control-character...
+ *
+ * @return true if the given or next character is a line-terminator
+ */
+ boolean readEndOfLine(int ch) throws IOException {
+ // check if we have \r\n...
+ if (ch == CR && reader.lookAhead() == LF) {
+ // note: does not change ch outside of this method!
+ ch = reader.read();
+ // Save the EOL state
+ if (firstEol == null) {
+ this.firstEol = Constants.CRLF;
+ }
+ }
+ // save EOL state here.
+ if (firstEol == null) {
+ if (ch == LF) {
+ this.firstEol = LF_STRING;
+ } else if (ch == CR) {
+ this.firstEol = CR_STRING;
+ }
+ }
+
+ return ch == LF || ch == CR;
+ }
+
+ // TODO escape handling needs more work
+ /**
+ * Handle an escape sequence.
+ * The current character must be the escape character.
+ * On return, the next character is available by calling {@link ExtendedBufferedReader#getLastChar()}
+ * on the input stream.
+ *
+ * @return the unescaped character (as an int) or {@link Constants#END_OF_STREAM} if char following the escape is
+ * invalid.
+ * @throws IOException if there is a problem reading the stream or the end of stream is detected:
+ * the escape character is not allowed at end of stream
+ */
+ int readEscape() throws IOException {
+ // the escape char has just been read (normally a backslash)
+ final int ch = reader.read();
+ switch (ch) {
+ case 'r':
+ return CR;
+ case 'n':
+ return LF;
+ case 't':
+ return TAB;
+ case 'b':
+ return BACKSPACE;
+ case 'f':
+ return FF;
+ case CR:
+ case LF:
+ case FF: // TODO is this correct?
+ case TAB: // TODO is this correct? Do tabs need to be escaped?
+ case BACKSPACE: // TODO is this correct?
+ return ch;
+ case END_OF_STREAM:
+ throw new IOException("EOF whilst processing escape sequence");
+ default:
+ // Now check for meta-characters
+ if (isMetaChar(ch)) {
+ return ch;
+ }
+ // indicate unexpected char - available from in.getLastChar()
+ return END_OF_STREAM;
+ }
+ }
+
+ void trimTrailingSpaces(final StringBuilder buffer) {
+ int length = buffer.length();
+ while (length > 0 && Character.isWhitespace(buffer.charAt(length - 1))) {
+ length = length - 1;
+ }
+ if (length != buffer.length()) {
+ buffer.setLength(length);
+ }
+ }
+}
diff --git a/src/test/resources/org/apache/commons/csv/QuoteMode.java b/src/test/resources/org/apache/commons/csv/QuoteMode.java
new file mode 100644
index 0000000..272deb7
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/QuoteMode.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.csv;
+
+/**
+ * Defines quoting behavior when printing.
+ */
+public enum QuoteMode {
+
+ /**
+ * Quotes all fields.
+ */
+ ALL,
+
+ /**
+ * Quotes all non-null fields.
+ */
+ ALL_NON_NULL,
+
+ /**
+ * Quotes fields which contain special characters such as a the field delimiter, quote character or any of the
+ * characters in the line separator string.
+ */
+ MINIMAL,
+
+ /**
+ * Quotes all non-numeric fields.
+ */
+ NON_NUMERIC,
+
+ /**
+ * Never quotes fields. When the delimiter occurs in data, the printer prefixes it with the escape character. If the
+ * escape character is not set, format validation throws an exception.
+ */
+ NONE
+}
diff --git a/src/test/resources/org/apache/commons/csv/Token.java b/src/test/resources/org/apache/commons/csv/Token.java
new file mode 100644
index 0000000..dff7d01
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/Token.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.commons.csv;
+
+import static org.apache.commons.csv.Token.Type.INVALID;
+
+/**
+ * Internal token representation.
+ * <p/>
+ * It is used as contract between the lexer and the parser.
+ */
+final class Token {
+
+ enum Type {
+ /** Token has no valid content, i.e. is in its initialized state. */
+ INVALID,
+
+ /** Token with content, at beginning or in the middle of a line. */
+ TOKEN,
+
+ /** Token (which can have content) when the end of file is reached. */
+ EOF,
+
+ /** Token with content when the end of a line is reached. */
+ EORECORD,
+
+ /** Token is a comment line. */
+ COMMENT
+ }
+
+ /** length of the initial token (content-)buffer */
+ private static final int INITIAL_TOKEN_LENGTH = 50;
+
+ /** Token type */
+ Token.Type type = INVALID;
+
+ /** The content buffer. */
+ final StringBuilder content = new StringBuilder(INITIAL_TOKEN_LENGTH);
+
+ /** Token ready flag: indicates a valid token with content (ready for the parser). */
+ boolean isReady;
+
+ void reset() {
+ content.setLength(0);
+ type = INVALID;
+ isReady = false;
+ }
+
+ /**
+ * Eases IDE debugging.
+ *
+ * @return a string helpful for debugging.
+ */
+ @Override
+ public String toString() {
+ return type.name() + " [" + content.toString() + "]";
+ }
+}
diff --git a/src/test/resources/csv-167/sample1.csv b/src/test/resources/org/apache/commons/csv/csv-167/sample1.csv
similarity index 100%
rename from src/test/resources/csv-167/sample1.csv
rename to src/test/resources/org/apache/commons/csv/csv-167/sample1.csv
diff --git a/src/test/resources/org/apache/commons/csv/package-info.java b/src/test/resources/org/apache/commons/csv/package-info.java
new file mode 100644
index 0000000..29e7fef
--- /dev/null
+++ b/src/test/resources/org/apache/commons/csv/package-info.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Apache Commons CSV Format Support.
+ *
+ * <p>CSV are widely used as interfaces to legacy systems or manual data-imports.
+ * CSV stands for "Comma Separated Values" (or sometimes "Character Separated
+ * Values"). The CSV data format is defined in
+ * <a href="http://tools.ietf.org/html/rfc4180" target="_blank">RFC 4180</a>
+ * but many dialects exist.</p>
+ *
+ * <p>Common to all file dialects is its basic structure: The CSV data-format
+ * is record oriented, whereas each record starts on a new textual line. A
+ * record is build of a list of values. Keep in mind that not all records
+ * must have an equal number of values:</p>
+ * <pre>
+ * csv := records*
+ * record := values*
+ * </pre>
+ *
+ * <p>The following list contains the CSV aspects the Commons CSV parser supports:</p>
+ * <dl>
+ * <dt>Separators (for lines)</dt>
+ * <dd>The record separators are hardcoded and cannot be changed. The must be '\r', '\n' or '\r\n'.</dd>
+ *
+ * <dt>Delimiter (for values)</dt>
+ * <dd>The delimiter for values is freely configurable (default ',').</dd>
+ *
+ * <dt>Comments</dt>
+ * <dd>Some CSV-dialects support a simple comment syntax. A comment is a record
+ * which must start with a designated character (the commentStarter). A record
+ * of this kind is treated as comment and gets removed from the input (default none)</dd>
+ *
+ * <dt>Encapsulator</dt>
+ * <dd>Two encapsulator characters (default '"') are used to enclose -> complex values.</dd>
+ *
+ * <dt>Simple values</dt>
+ * <dd>A simple value consist of all characters (except the delimiter) until
+ * (but not including) the next delimiter or a record-terminator. Optionally
+ * all surrounding whitespaces of a simple value can be ignored (default: true).</dd>
+ *
+ * <dt>Complex values</dt>
+ * <dd>Complex values are encapsulated within a pair of the defined encapsulator characters.
+ * The encapsulator itself must be escaped or doubled when used inside complex values.
+ * Complex values preserve all kind of formatting (including newlines -> multiline-values)</dd>
+ *
+ * <dt>Empty line skipping</dt>
+ * <dd>Optionally empty lines in CSV files can be skipped.
+ * Otherwise, empty lines will return a record with a single empty value.</dd>
+ * </dl>
+ *
+ * <p>In addition to individually defined dialects, two predefined dialects (strict-csv, and excel-csv)
+ * can be set directly.</p> <!-- TODO fix -->
+ *
+ * <p>Example usage:</p>
+ * <blockquote><pre>
+ * Reader in = new StringReader("a,b,c");
+ * for (CSVRecord record : CSVFormat.DEFAULT.parse(in)) {
+ * for (String field : record) {
+ * System.out.print("\"" + field + "\", ");
+ * }
+ * System.out.println();
+ * }
+ * </pre></blockquote>
+ */
+
+package org.apache.commons.csv;
diff --git a/src/test/resources/perf/worldcitiespop.txt.gz b/src/test/resources/org/apache/commons/csv/perf/worldcitiespop.txt.gz
similarity index 100%
rename from src/test/resources/perf/worldcitiespop.txt.gz
rename to src/test/resources/org/apache/commons/csv/perf/worldcitiespop.txt.gz