You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by yo...@apache.org on 2010/06/16 18:12:34 UTC
svn commit: r955284 - in /commons/sandbox/csv/trunk/src:
java/org/apache/commons/csv/ test/org/apache/commons/csv/
Author: yonik
Date: Wed Jun 16 16:12:34 2010
New Revision: 955284
URL: http://svn.apache.org/viewvc?rev=955284&view=rev
Log:
SANDBOX-322: CSVPrinter overhaul
Modified:
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java
commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java
Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java Wed Jun 16 16:12:34 2010
@@ -70,8 +70,7 @@ public class CSVParser {
// the input stream
private final ExtendedBufferedReader in;
- // TODO: this can be made final if setStrategy is removed
- private CSVStrategy strategy;
+ private final CSVStrategy strategy;
// the following objects are shared to reduce garbage
/** A record buffer for getLine(). Grows as necessary and is reused. */
@@ -346,7 +345,7 @@ public class CSVParser {
// important: make sure a new char gets consumed in each iteration
while (!tkn.isReady) {
// ignore whitespaces at beginning of a token
- while (isWhitespace(c) && !eol) {
+ while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
wsBuf.append((char) c);
c = in.read();
eol = isEndOfLine(c);
@@ -561,18 +560,7 @@ public class CSVParser {
// ======================================================
/**
- * Sets the specified CSV Strategy
- *
- * @return current instance of CSVParser to allow chained method calls
- * @deprecated the strategy should be set in the constructor {@link #CSVParser(Reader,CSVStrategy)}.
- */
- public CSVParser setStrategy(CSVStrategy strategy) {
- this.strategy = strategy;
- return this;
- }
-
- /**
- * Obtain the specified CSV Strategy
+ * Obtain the specified CSV Strategy. This should not be modified.
*
* @return strategy currently being used
*/
Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java Wed Jun 16 16:12:34 2010
@@ -16,6 +16,7 @@
*/
package org.apache.commons.csv;
+import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Writer;
@@ -26,63 +27,27 @@ import java.io.Writer;
public class CSVPrinter {
/** The place that the values get written. */
- protected PrintWriter out;
+ protected final Writer out;
+ protected final CSVStrategy strategy;
/** True if we just began a new line. */
protected boolean newLine = true;
- private CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+ protected char[] buf = new char[0]; // temporary buffer
/**
* Create a printer that will print values to the given
- * stream. Character to byte conversion is done using
- * the default character encoding. Comments will be
- * written using the default comment character '#'.
+ * stream following the CSVStrategy.
*
- * @param out stream to which to print.
- */
- public CSVPrinter(OutputStream out) {
- this.out = new PrintWriter(out);
- }
-
-
- /**
- * Create a printer that will print values to the given
- * stream. Comments will be
- * written using the default comment character '#'.
+ * Currently, only a pure encapsulation strategy or a pure escaping strategy
+ * is supported. Hybrid strategies (encapsulation and escaping with a different character) are not supported.
*
* @param out stream to which to print.
+ * @param strategy describes the CSV variation.
*/
- public CSVPrinter(Writer out) {
- if (out instanceof PrintWriter) {
- this.out = (PrintWriter) out;
- } else {
- this.out = new PrintWriter(out);
- }
- }
-
-
- // ======================================================
- // strategies
- // ======================================================
-
- /**
- * Sets the specified CSV Strategy
- *
- * @return current instance of CSVParser to allow chained method calls
- */
- public CSVPrinter setStrategy(CSVStrategy strategy) {
- this.strategy = strategy;
- return this;
- }
-
- /**
- * Obtain the specified CSV Strategy
- *
- * @return strategy currently being used
- */
- public CSVStrategy getStrategy() {
- return this.strategy;
+ public CSVPrinter(Writer out, CSVStrategy strategy) {
+ this.out = out;
+ this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
}
// ======================================================
@@ -90,26 +55,15 @@ public class CSVPrinter {
// ======================================================
/**
- * Print the string as the last value on the line. The value
- * will be quoted if needed.
- *
- * @param value value to be outputted.
+ * Output a blank line
*/
- public void println(String value) {
- print(value);
- out.println();
- out.flush();
+ public void println() throws IOException {
+ out.write(strategy.getPrinterNewline());
newLine = true;
}
-
- /**
- * Output a blank line
- */
- public void println() {
- out.println();
+ public void flush() throws IOException {
out.flush();
- newLine = true;
}
@@ -120,32 +74,11 @@ public class CSVPrinter {
*
* @param values values to be outputted.
*/
- public void println(String[] values) {
+ public void println(String[] values) throws IOException {
for (int i = 0; i < values.length; i++) {
print(values[i]);
}
- out.println();
- out.flush();
- newLine = true;
- }
-
-
- /**
- * Print several lines of comma separated values.
- * The values will be quoted if needed. Quotes and
- * newLine characters will be escaped.
- *
- * @param values values to be outputted.
- */
- public void println(String[][] values) {
- for (int i = 0; i < values.length; i++) {
- println(values[i]);
- }
- if (values.length == 0) {
- out.println();
- }
- out.flush();
- newLine = true;
+ println();
}
@@ -158,15 +91,15 @@ public class CSVPrinter {
*
* @param comment the comment to output
*/
- public void printlnComment(String comment) {
+ public void printlnComment(String comment) throws IOException {
if(this.strategy.isCommentingDisabled()) {
return;
}
if (!newLine) {
- out.println();
+ println();
}
- out.print(this.strategy.getCommentStart());
- out.print(' ');
+ out.write(this.strategy.getCommentStart());
+ out.write(' ');
for (int i = 0; i < comment.length(); i++) {
char c = comment.charAt(i);
switch (c) {
@@ -176,120 +109,201 @@ public class CSVPrinter {
}
// break intentionally excluded.
case '\n' :
- out.println();
- out.print(this.strategy.getCommentStart());
- out.print(' ');
+ println();
+ out.write(this.strategy.getCommentStart());
+ out.write(' ');
break;
default :
- out.print(c);
+ out.write(c);
break;
}
}
- out.println();
- out.flush();
- newLine = true;
+ println();
}
- /**
- * Print the string as the next value on the line. The value
- * will be quoted if needed.
- *
- * @param value value to be outputted.
- */
- public void print(String value) {
- boolean quote = false;
- if (value.length() > 0) {
- char c = value.charAt(0);
- if (newLine
- && (c < '0'
- || (c > '9' && c < 'A')
- || (c > 'Z' && c < 'a')
- || (c > 'z'))) {
- quote = true;
- }
- if (c == ' ' || c == '\f' || c == '\t') {
- quote = true;
+ public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
+ if (!checkForEscape) {
+ if (newLine) {
+ newLine = false;
+ } else {
+ out.write(this.strategy.getDelimiter());
}
- for (int i = 0; i < value.length(); i++) {
- c = value.charAt(i);
- if (c == '"' || c == this.strategy.getDelimiter() || c == '\n' || c == '\r') {
- quote = true;
- c = value.charAt( value.length() - 1 );
- break;
+ out.write(value, offset, len);
+ return;
+ }
+
+ if (strategy.getEncapsulator() != (char)-2) {
+ printAndEncapsulate(value, offset, len);
+ } else if (strategy.getEscape() != (char)-2) {
+ printAndEscape(value, offset, len);
+ } else {
+ out.write(value, offset, len);
+ }
+ }
+
+ void printSep() throws IOException {
+ if (newLine) {
+ newLine = false;
+ } else {
+ out.write(this.strategy.getDelimiter());
+ }
+ }
+
+ void printAndEscape(char[] value, int offset, int len) throws IOException {
+ int start = offset;
+ int pos = offset;
+ int end = offset + len;
+
+ char delim = this.strategy.getDelimiter();
+ char escape = this.strategy.getEscape();
+
+ printSep();
+
+ while (pos < end) {
+ char c = value[pos];
+ if (c == '\r' || c=='\n' || c==delim || c==escape) {
+ // write out segment up until this char
+ int l = pos-start;
+ if (l>0) {
+ out.write(value, start, l);
}
+ if (c=='\n') c='n';
+ else if (c=='\r') c='r';
+
+ out.write(escape);
+ out.write(c);
+
+ start = pos+1; // start on the current char after this one
}
- if (c == ' ' || c == '\f' || c == '\t') {
- quote = true;
- }
- } else if (newLine) {
+
+ pos++;
+ }
+
+ // write last segment
+ int l = pos-start;
+ if (l>0) {
+ out.write(value, start, l);
+ }
+ }
+
+ void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
+ boolean first = newLine; // is this the first value on this line?
+ boolean quote = false;
+ int start = offset;
+ int pos = offset;
+ int end = offset + len;
+
+ char delim = this.strategy.getDelimiter();
+ char encapsulator = this.strategy.getEncapsulator();
+
+ printSep();
+
+ if (len <= 0) {
// always quote an empty token that is the first
// on the line, as it may be the only thing on the
// line. If it were not quoted in that case,
// an empty line has no tokens.
- quote = true;
- }
- if (newLine) {
- newLine = false;
+ if (first) {
+ quote = true;
+ }
} else {
- out.print(this.strategy.getDelimiter());
+ char c = value[pos];
+
+ // Hmmm, where did this rule come from?
+ if (first
+ && (c < '0'
+ || (c > '9' && c < 'A')
+ || (c > 'Z' && c < 'a')
+ || (c > 'z'))) {
+ quote = true;
+ // } else if (c == ' ' || c == '\f' || c == '\t') {
+ } else if (c <= '#') {
+ // Some other chars at the start of a value caused the parser to fail, so for now
+ // encapsulate if we start in anything less than '#'. We are being conservative
+ // by including the default comment char too.
+ quote = true;
+ } else {
+ while (pos < end) {
+ c = value[pos];
+ if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
+ quote = true;
+ break;
+ }
+ pos++;
+ }
+
+ if (!quote) {
+ pos = end-1;
+ c = value[pos];
+ // if (c == ' ' || c == '\f' || c == '\t') {
+ // Some other chars at the end caused the parser to fail, so for now
+ // encapsulate if we end in anything less than ' '
+ if (c <= ' ') {
+ quote = true;
+ }
+ }
+ }
}
- if (quote) {
- out.print(escapeAndQuote(value));
- } else {
- out.print(value);
+
+ if (!quote) {
+ // no encapsulation needed - write out the original value
+ out.write(value, offset, len);
+ return;
+ }
+
+ // we hit something that needed encapsulation
+ out.write(encapsulator);
+
+ // Pick up where we left off: pos should be positioned on the first character that caused
+ // the need for encapsulation.
+ while (pos<end) {
+ char c = value[pos];
+ if (c==encapsulator) {
+ // write out the chunk up until this point
+
+ // add 1 to the length to write out the encapsulator also
+ out.write(value, start, pos-start+1);
+ // put the next starting position on the encapsulator so we will
+ // write it out again with the next string (effectively doubling it)
+ start = pos;
+ }
+ pos++;
}
- out.flush();
- }
+ // write the last segment
+ out.write(value, start, pos-start);
+ out.write(encapsulator);
+ }
/**
- * Enclose the value in quotes and escape the quote
- * and comma characters that are inside.
+ * Print the string as the next value on the line. The value
+ * will be escaped or encapsulated as needed if checkForEscape==true
*
- * @param value needs to be escaped and quoted
- * @return the value, escaped and quoted
+ * @param value value to be outputted.
*/
- private String escapeAndQuote(String value) {
- // the initial count is for the preceding and trailing quotes
- int count = 2;
- for (int i = 0; i < value.length(); i++) {
- switch (value.charAt(i)) {
- case '\"' :
- case '\n' :
- case '\r' :
- case '\\' :
- count++;
- break;
- default:
- break;
- }
+ public void print(String value, boolean checkForEscape) throws IOException {
+ if (!checkForEscape) {
+ // write directly from string
+ out.write(value);
+ return;
}
- StringBuffer sb = new StringBuffer(value.length() + count);
- sb.append(strategy.getEncapsulator());
- for (int i = 0; i < value.length(); i++) {
- char c = value.charAt(i);
-
- if (c == strategy.getEncapsulator()) {
- sb.append('\\').append(c);
- continue;
- }
- switch (c) {
- case '\n' :
- sb.append("\\n");
- break;
- case '\r' :
- sb.append("\\r");
- break;
- case '\\' :
- sb.append("\\\\");
- break;
- default :
- sb.append(c);
- }
+
+ if (buf.length < value.length()) {
+ buf = new char[value.length()];
}
- sb.append(strategy.getEncapsulator());
- return sb.toString();
+
+ value.getChars(0, value.length(), buf, 0);
+ print(buf, 0, value.length(), checkForEscape);
}
+ /**
+ * Print the string as the next value on the line. The value
+ * will be escaped or encapsulated as needed.
+ *
+ * @param value value to be outputted.
+ */
+ public void print(String value) throws IOException {
+ print(value, true);
+ }
}
Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java Wed Jun 16 16:12:34 2010
@@ -34,12 +34,16 @@ public class CSVStrategy implements Clon
private boolean interpretUnicodeEscapes;
private boolean ignoreEmptyLines;
+ // controls for output
+ private String printerNewline = "\n";
+
// -2 is used to signal disabled, because it won't be confused with
// an EOF signal (-1), and because \ufffe in UTF-16 would be
// encoded as two chars (using surrogates) and thus there should never
// be a collision with a real text char.
public static char COMMENTS_DISABLED = (char)-2;
public static char ESCAPE_DISABLED = (char)-2;
+ public static char ENCAPSULATOR_DISABLED = (char)-2;
public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true,
true, false, true);
@@ -98,7 +102,6 @@ public class CSVStrategy implements Clon
true, interpretUnicodeEscapes, ignoreEmptyLines);
}
-
public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
public char getDelimiter() { return this.delimiter; }
@@ -130,6 +133,13 @@ public class CSVStrategy implements Clon
public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
+ public void setPrinterNewline(String newline) {
+ this.printerNewline = newline;
+ }
+ public String getPrinterNewline() {
+ return this.printerNewline;
+ }
+
public Object clone() {
try {
return super.clone();
Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java Wed Jun 16 16:12:34 2010
@@ -48,10 +48,10 @@ public class CSVUtils {
* @return the CSV string, will be an empty string if the length of the
* value array is 0
*/
- public static String printLine(String[] values) {
+ public static String printLine(String[] values, CSVStrategy strategy) {
// set up a CSVUtils
StringWriter stringWriter = new StringWriter();
- CSVPrinter csvPrinter = new CSVPrinter(stringWriter);
+ CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy);
// check for null values an "null" as strings and convert them
// into the strings "null" and "\"null\""
@@ -64,8 +64,11 @@ public class CSVUtils {
}
// convert to CSV
- csvPrinter.println(values);
-
+ try {
+ csvPrinter.println(values);
+ } catch (IOException e) {
+ // should not happen with StringWriter
+ }
// as the resulting string has \r\n at the end, we will trim that away
return stringWriter.toString().trim();
}
Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java Wed Jun 16 16:12:34 2010
@@ -45,6 +45,10 @@ public class CSVParserTest extends TestC
TestCSVParser(Reader in) {
super(in);
}
+
+ TestCSVParser(Reader in, CSVStrategy strategy) {
+ super(in, strategy);
+ }
/**
* Calls super.nextToken() and prints out a String representation of token
* type and content.
@@ -65,7 +69,6 @@ public class CSVParserTest extends TestC
public void testNextToken1() throws IOException {
String code = "abc,def, hijk, lmnop, qrst,uv ,wxy ,z , ,";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
@@ -88,10 +91,13 @@ public class CSVParserTest extends TestC
*
*/
String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.getStrategy().setIgnoreEmptyLines(false);
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
- parser.getStrategy().setCommentStart('#');
+ CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
+ // strategy.setIgnoreEmptyLines(false);
+ strategy.setCommentStart('#');
+
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+
+
assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
@@ -114,9 +120,10 @@ public class CSVParserTest extends TestC
* \,,
*/
String code = "a,\\,,b\n\\,,";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
- parser.getStrategy().setCommentStart('#');
+ CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
+ strategy.setCommentStart('#');
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
// an unquoted single backslash is not an escape char
assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
@@ -138,7 +145,6 @@ public class CSVParserTest extends TestC
String code =
"a,\"foo\",b\na, \" foo\",b\na,\"foo \" ,b\na, \" foo \" ,b";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
@@ -159,7 +165,6 @@ public class CSVParserTest extends TestC
String code =
"a,\"foo\n\",b\n\"foo\n baar ,,,\"\n\"\n\t \n\"";
TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
@@ -177,8 +182,7 @@ public class CSVParserTest extends TestC
* ;;
*/
String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy( new CSVStrategy(';', '\'', '!') );
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
assertEquals(
CSVParser.TT_EORECORD + ";b and ' more\n;",
@@ -265,8 +269,7 @@ public class CSVParserTest extends TestC
{""},
{"world", ""}
};
- CSVParser parser = new CSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
assertTrue(tmp.length > 0);
@@ -294,8 +297,7 @@ public class CSVParserTest extends TestC
String code;
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
assertTrue(tmp.length > 0);
@@ -324,7 +326,6 @@ public class CSVParserTest extends TestC
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
assertTrue(tmp.length > 0);
@@ -349,8 +350,7 @@ public class CSVParserTest extends TestC
String code;
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
- CSVParser parser = new CSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+ CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
assertTrue(tmp.length > 0);
@@ -374,7 +374,6 @@ public class CSVParserTest extends TestC
for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
code = codes[codeIndex];
CSVParser parser = new CSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
String[][] tmp = parser.getAllValues();
assertEquals(res.length, tmp.length);
assertTrue(tmp.length > 0);
@@ -457,6 +456,61 @@ public class CSVParserTest extends TestC
}
}
+ public void testBackslashEscaping2() throws IOException {
+
+ // To avoid confusion over the need for escaping chars in java code,
+ // We will test with a forward slash as the escape char, and a single
+ // quote as the encapsulator.
+
+ String code = ""
+ + " , , \n" // 1)
+ + " \t , , \n" // 2)
+ + " // , /, , /,\n" // 3)
+ + "";
+ String[][] res = {
+ { " ", " ", " " }, // 1
+ { " \t ", " ", " " }, // 2
+ { " / ", " , ", " ," }, //3
+ };
+
+
+ CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
+
+ CSVParser parser = new CSVParser(new StringReader(code), strategy);
+ String[][] tmp = parser.getAllValues();
+ assertTrue(tmp.length > 0);
+
+ if (!CSVPrinterTest.equals(res, tmp)) {
+ assertTrue(false);
+ }
+
+ }
+
+
+ public void testDefaultStrategy() throws IOException {
+
+ String code = ""
+ + "a,b\n" // 1)
+ + "\"\n\",\" \"\n" // 2)
+ + "\"\",#\n" // 2)
+ ;
+ String[][] res = {
+ { "a", "b" },
+ { "\n", " " },
+ { "", "#" }, // WARNING: TODO: this causes a hang if comments are enabled
+ };
+
+ CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+ assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());
+
+ CSVParser parser = new CSVParser(new StringReader(code), strategy);
+ String[][] tmp = parser.getAllValues();
+ assertTrue(tmp.length > 0);
+
+ if (!CSVPrinterTest.equals(res, tmp)) {
+ assertTrue(false);
+ }
+ }
public void testUnicodeEscape() throws IOException {
@@ -502,8 +556,7 @@ public class CSVParserTest extends TestC
// From SANDBOX-153
public void testDelimiterIsWhitespace() throws IOException {
String code = "one\ttwo\t\tfour \t five\t six";
- TestCSVParser parser = new TestCSVParser(new StringReader(code));
- parser.setStrategy(CSVStrategy.TDF_STRATEGY);
+ TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());
Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java Wed Jun 16 16:12:34 2010
@@ -16,7 +16,12 @@
*/
package org.apache.commons.csv;
+import java.io.IOException;
+import java.io.StringReader;
import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.Random;
+
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
@@ -26,48 +31,161 @@ import junit.framework.TestSuite;
*/
public class CSVPrinterTest extends TestCase {
- String lineSeparator = System.getProperty("line.separator");
+ String lineSeparator = "\n";
- public void testPrinter1() {
+ public void testPrinter1() throws IOException {
StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw);
+ CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
String[] line1 = {"a", "b"};
printer.println(line1);
assertEquals("a,b" + lineSeparator, sw.toString());
}
- public void testPrinter2() {
+ public void testPrinter2() throws IOException {
StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw);
+ CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
String[] line1 = {"a,b", "b"};
printer.println(line1);
assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
}
- public void testPrinter3() {
+ public void testPrinter3() throws IOException {
StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw);
+ CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
String[] line1 = {"a, b", "b "};
printer.println(line1);
assertEquals("\"a, b\",\"b \"" + lineSeparator, sw.toString());
}
- public void testExcelPrinter1() {
+ public void testExcelPrinter1() throws IOException {
StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw);
- printer.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+ CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
String[] line1 = {"a", "b"};
printer.println(line1);
assertEquals("a,b" + lineSeparator, sw.toString());
}
- public void testExcelPrinter2() {
+ public void testExcelPrinter2() throws IOException {
StringWriter sw = new StringWriter();
- CSVPrinter printer = new CSVPrinter(sw);
- printer.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+ CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
String[] line1 = {"a,b", "b"};
printer.println(line1);
assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
}
+
+
+ public void testRandom() throws Exception {
+ int iter=10000;
+ strategy = CSVStrategy.DEFAULT_STRATEGY;
+ doRandom(iter);
+ strategy = CSVStrategy.EXCEL_STRATEGY;
+ doRandom(iter);
+
+ // Strategy for MySQL
+ strategy = new CSVStrategy('\t', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED,'\\',false, false, false, false);
+ doRandom(iter);
+ }
+
+ Random r = new Random();
+ CSVStrategy strategy;
+
+ public void doRandom(int iter) throws Exception {
+ for (int i=0; i<iter; i++) {
+ doOneRandom();
+ }
+ }
+
+ public void doOneRandom() throws Exception {
+ int nLines = r.nextInt(4)+1;
+ int nCol = r.nextInt(3)+1;
+ // nLines=1;nCol=2;
+ String[][] lines = new String[nLines][];
+ for (int i=0; i<nLines; i++) {
+ String[] line = new String[nCol];
+ lines[i] = line;
+ for (int j=0; j<nCol; j++) {
+ line[j] = randStr();
+ }
+ }
+
+ StringWriter sw = new StringWriter();
+ CSVPrinter printer = new CSVPrinter(sw, strategy);
+
+ for (int i=0; i<nLines; i++) {
+ // for (int j=0; j<lines[i].length; j++) System.out.println("### VALUE=:" + printable(lines[i][j]));
+ printer.println(lines[i]);
+ }
+
+ printer.flush();
+ String result = sw.toString();
+ // System.out.println("### :" + printable(result));
+
+ StringReader reader = new StringReader(result);
+
+ CSVParser parser = new CSVParser(reader, strategy);
+ String[][] parseResult = parser.getAllValues();
+
+ if (!equals(lines, parseResult)) {
+ System.out.println("Printer output :" + printable(result));
+ assertTrue(false);
+ }
+ }
+
+ public static boolean equals(String[][] a, String[][] b) {
+ for (int i=0; i<a.length; i++) {
+ String[] linea = a[i];
+ String[] lineb = b[i];
+ for (int j=0; j<linea.length; j++) {
+ String aval = linea[j];
+ String bval = lineb[j];
+ if (!aval.equals(bval)) {
+ System.out.println("expected :" + printable(aval));
+ System.out.println("got :" + printable(bval));
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ public static String printable(String s) {
+ StringBuffer sb = new StringBuffer();
+ for (int i=0; i<s.length(); i++) {
+ char ch = s.charAt(i);
+ if (ch<=' ' || ch>=128) {
+ sb.append("(" + (int)ch + ")");
+ } else {
+ sb.append(ch);
+ }
+ }
+ return sb.toString();
+ }
+
+ public String randStr() {
+ int sz = r.nextInt(20);
+ // sz = r.nextInt(3);
+ char[] buf = new char[sz];
+ for (int i=0; i<sz; i++) {
+ // stick in special chars with greater frequency
+ char ch;
+ int what = r.nextInt(20);
+ switch (what) {
+ case 0: ch = '\r'; break;
+ case 1: ch = '\n'; break;
+ case 2: ch = '\t'; break;
+ case 3: ch = '\f'; break;
+ case 4: ch = ' '; break;
+ case 5: ch = ','; break;
+ case 6: ch = '"'; break;
+ case 7: ch = '\''; break;
+ case 8: ch = '\\'; break;
+ default: ch = (char)r.nextInt(300); break;
+ // default: ch = 'a'; break;
+ }
+ buf[i] = ch;
+ }
+ return new String(buf);
+ }
+
}
Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java Wed Jun 16 16:12:34 2010
@@ -35,8 +35,7 @@ public class CSVStrategyTest extends Tes
// getters / setters
// ======================================================
public void testGetSetCommentStart() {
- CSVParser parser = new CSVParser(new StringReader("hello world"));
- CSVStrategy strategy = parser.getStrategy();
+ CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setCommentStart('#');
assertEquals(strategy.getCommentStart(), '#');
strategy.setCommentStart('!');
@@ -44,8 +43,7 @@ public class CSVStrategyTest extends Tes
}
public void testGetSetEncapsulator() {
- CSVParser parser = new CSVParser(new StringReader("hello world"));
- CSVStrategy strategy = parser.getStrategy();
+ CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setEncapsulator('"');
assertEquals(strategy.getEncapsulator(), '"');
strategy.setEncapsulator('\'');
@@ -53,8 +51,7 @@ public class CSVStrategyTest extends Tes
}
public void testGetSetDelimiter() {
- CSVParser parser = new CSVParser(new StringReader("hello world"));
- CSVStrategy strategy = parser.getStrategy();
+ CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
strategy.setDelimiter(';');
assertEquals(strategy.getDelimiter(), ';');
strategy.setDelimiter(',');
@@ -64,8 +61,7 @@ public class CSVStrategyTest extends Tes
}
public void testSetCSVStrategy() {
- CSVParser parser = new CSVParser(new StringReader("hello world"));
- CSVStrategy strategy = parser.getStrategy();
+ CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
// default settings
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
@@ -74,7 +70,6 @@ public class CSVStrategyTest extends Tes
assertEquals(false, strategy.getUnicodeEscapeInterpretation());
assertEquals(true, strategy.getIgnoreEmptyLines());
// explicit csv settings
- parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
assertEquals(strategy.getDelimiter(), ',');
assertEquals(strategy.getEncapsulator(), '"');
assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);