You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commons.apache.org by yo...@apache.org on 2010/06/16 18:12:34 UTC

svn commit: r955284 - in /commons/sandbox/csv/trunk/src: java/org/apache/commons/csv/ test/org/apache/commons/csv/

Author: yonik
Date: Wed Jun 16 16:12:34 2010
New Revision: 955284

URL: http://svn.apache.org/viewvc?rev=955284&view=rev
Log:
SANDBOX-322: CSVPrinter overhaul

Modified:
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
    commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java
    commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
    commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java
    commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java

Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVParser.java Wed Jun 16 16:12:34 2010
@@ -70,8 +70,7 @@ public class CSVParser {
   // the input stream
   private final ExtendedBufferedReader in;
 
-  // TODO: this can be made final if setStrategy is removed
-  private CSVStrategy strategy;
+  private final CSVStrategy strategy;
   
   // the following objects are shared to reduce garbage 
   /** A record buffer for getLine(). Grows as necessary and is reused. */
@@ -346,7 +345,7 @@ public class CSVParser {
     //  important: make sure a new char gets consumed in each iteration
     while (!tkn.isReady) {
       // ignore whitespaces at beginning of a token
-      while (isWhitespace(c) && !eol) {
+      while (strategy.getIgnoreLeadingWhitespaces() && isWhitespace(c) && !eol) {
         wsBuf.append((char) c);
         c = in.read();
         eol = isEndOfLine(c);
@@ -561,18 +560,7 @@ public class CSVParser {
   // ======================================================
   
   /**
-   * Sets the specified CSV Strategy
-   *
-   * @return current instance of CSVParser to allow chained method calls
-   * @deprecated the strategy should be set in the constructor {@link #CSVParser(Reader,CSVStrategy)}.
-   */
-  public CSVParser setStrategy(CSVStrategy strategy) {
-    this.strategy = strategy;
-    return this;
-  }
-  
-  /**
-   * Obtain the specified CSV Strategy
+   * Obtain the specified CSV Strategy.  This should not be modified.
    * 
    * @return strategy currently being used
    */

Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVPrinter.java Wed Jun 16 16:12:34 2010
@@ -16,6 +16,7 @@
  */
 package org.apache.commons.csv;
 
+import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.io.Writer;
@@ -26,63 +27,27 @@ import java.io.Writer;
 public class CSVPrinter {
 
   /** The place that the values get written. */
-  protected PrintWriter out;
+  protected final Writer out;
+  protected final CSVStrategy strategy;
 
   /** True if we just began a new line. */
   protected boolean newLine = true;
 
-  private CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+  protected char[] buf = new char[0];  // temporary buffer
 
   /**
    * Create a printer that will print values to the given
-   * stream. Character to byte conversion is done using
-   * the default character encoding. Comments will be
-   * written using the default comment character '#'.
+   * stream following the CSVStrategy.
    *
-   * @param out stream to which to print.
-   */
-  public CSVPrinter(OutputStream out) {
-    this.out = new PrintWriter(out);
-  }
-
-
-  /**
-   * Create a printer that will print values to the given
-   * stream. Comments will be
-   * written using the default comment character '#'.
+   * Currently, only a pure encapsulation strategy or a pure escaping strategy
+   * is supported.  Hybrid strategies (encapsulation and escaping with a different character) are not supported.
    *
    * @param out stream to which to print.
+   * @param strategy describes the CSV variation.
    */
-  public CSVPrinter(Writer out) {
-    if (out instanceof PrintWriter) {
-      this.out = (PrintWriter) out;
-    } else {
-      this.out = new PrintWriter(out);
-    }
-  }
-
-
-  // ======================================================
-  //  strategies
-  // ======================================================
-  
-  /**
-   * Sets the specified CSV Strategy
-   *
-   * @return current instance of CSVParser to allow chained method calls
-   */
-  public CSVPrinter setStrategy(CSVStrategy strategy) {
-    this.strategy = strategy;
-    return this;
-  }
-  
-  /**
-   * Obtain the specified CSV Strategy
-   * 
-   * @return strategy currently being used
-   */
-  public CSVStrategy getStrategy() {
-    return this.strategy;
+  public CSVPrinter(Writer out, CSVStrategy strategy) {
+    this.out = out;
+    this.strategy = strategy==null ? CSVStrategy.DEFAULT_STRATEGY : strategy;
   }
   
   // ======================================================
@@ -90,26 +55,15 @@ public class CSVPrinter {
   // ======================================================
 
   /**
-   * Print the string as the last value on the line. The value
-   * will be quoted if needed.
-   *
-   * @param value value to be outputted.
+   * Output a blank line
    */
-  public void println(String value) {
-    print(value);
-    out.println();
-    out.flush();
+  public void println() throws IOException {
+    out.write(strategy.getPrinterNewline());
     newLine = true;
   }
 
-
-  /**
-   * Output a blank line
-   */
-  public void println() {
-    out.println();
+  public void flush() throws IOException {
     out.flush();
-    newLine = true;
   }
 
 
@@ -120,32 +74,11 @@ public class CSVPrinter {
    *
    * @param values values to be outputted.
    */
-  public void println(String[] values) {
+  public void println(String[] values) throws IOException {
     for (int i = 0; i < values.length; i++) {
       print(values[i]);
     }
-    out.println();
-    out.flush();
-    newLine = true;
-  }
-
-
-  /**
-   * Print several lines of comma separated values.
-   * The values will be quoted if needed.  Quotes and
-   * newLine characters will be escaped.
-   *
-   * @param values values to be outputted.
-   */
-  public void println(String[][] values) {
-    for (int i = 0; i < values.length; i++) {
-      println(values[i]);
-    }
-    if (values.length == 0) {
-      out.println();
-    }
-    out.flush();
-    newLine = true;
+    println();
   }
 
 
@@ -158,15 +91,15 @@ public class CSVPrinter {
    *
    * @param comment the comment to output
    */
-  public void printlnComment(String comment) {
+  public void printlnComment(String comment) throws IOException {
     if(this.strategy.isCommentingDisabled()) {
         return;
     }
     if (!newLine) {
-      out.println();
+      println();
     }
-    out.print(this.strategy.getCommentStart());
-    out.print(' ');
+    out.write(this.strategy.getCommentStart());
+    out.write(' ');
     for (int i = 0; i < comment.length(); i++) {
       char c = comment.charAt(i);
       switch (c) {
@@ -176,120 +109,201 @@ public class CSVPrinter {
           }
           // break intentionally excluded.
         case '\n' :
-          out.println();
-          out.print(this.strategy.getCommentStart());
-          out.print(' ');
+          println();
+          out.write(this.strategy.getCommentStart());
+          out.write(' ');
           break;
         default :
-          out.print(c);
+          out.write(c);
           break;
       }
     }
-    out.println();
-    out.flush();
-    newLine = true;
+    println();
   }
 
 
-  /**
-   * Print the string as the next value on the line. The value
-   * will be quoted if needed.
-   *
-   * @param value value to be outputted.
-   */
-  public void print(String value) {
-    boolean quote = false;
-    if (value.length() > 0) {
-      char c = value.charAt(0);
-      if (newLine
-        && (c < '0'
-          || (c > '9' && c < 'A')
-          || (c > 'Z' && c < 'a')
-          || (c > 'z'))) {
-        quote = true;
-      }
-      if (c == ' ' || c == '\f' || c == '\t') {
-        quote = true;
+  public void print(char[] value, int offset, int len, boolean checkForEscape) throws IOException {
+    if (!checkForEscape) {
+      if (newLine) {
+        newLine = false;
+      } else {
+        out.write(this.strategy.getDelimiter());
       }
-      for (int i = 0; i < value.length(); i++) {
-        c = value.charAt(i);
-        if (c == '"' || c == this.strategy.getDelimiter() || c == '\n' || c == '\r') {
-          quote = true;
-          c = value.charAt( value.length() - 1 );
-          break;
+      out.write(value, offset, len);
+      return;
+    }
+
+    if (strategy.getEncapsulator() != (char)-2) {
+      printAndEncapsulate(value, offset, len);
+    } else if (strategy.getEscape() != (char)-2) {
+      printAndEscape(value, offset, len);
+    } else {
+      out.write(value, offset, len);
+    }
+  }
+
+  void printSep() throws IOException {
+    if (newLine) {
+      newLine = false;
+    } else {
+      out.write(this.strategy.getDelimiter());
+    }
+  }
+
+  void printAndEscape(char[] value, int offset, int len) throws IOException {
+    int start = offset;
+    int pos = offset;
+    int end = offset + len;
+
+    char delim = this.strategy.getDelimiter();
+    char escape = this.strategy.getEscape();
+
+    printSep();
+
+    while (pos < end) {
+      char c = value[pos];
+      if (c == '\r' || c=='\n' || c==delim || c==escape) {
+        // write out segment up until this char
+        int l = pos-start;
+        if (l>0) {
+          out.write(value, start, l);
         }
+        if (c=='\n') c='n';
+        else if (c=='\r') c='r';
+
+        out.write(escape);
+        out.write(c);
+
+        start = pos+1; // start on the current char after this one
       }
-      if (c == ' ' || c == '\f' || c == '\t') {
-        quote = true;
-      }
-    } else if (newLine) {
+
+      pos++;
+    }
+
+    // write last segment
+    int l = pos-start;
+    if (l>0) {
+      out.write(value, start, l);      
+    }
+  }
+
+  void printAndEncapsulate(char[] value, int offset, int len) throws IOException {
+    boolean first = newLine;  // is this the first value on this line?
+    boolean quote = false;
+    int start = offset;
+    int pos = offset;
+    int end = offset + len;
+
+    char delim = this.strategy.getDelimiter();
+    char encapsulator = this.strategy.getEncapsulator();
+
+    printSep();
+
+    if (len <= 0) {
       // always quote an empty token that is the first
       // on the line, as it may be the only thing on the
       // line. If it were not quoted in that case,
       // an empty line has no tokens.
-      quote = true;
-    }
-    if (newLine) {
-      newLine = false;
+      if (first) {
+        quote = true;
+      }
     } else {
-      out.print(this.strategy.getDelimiter());
+      char c = value[pos];
+
+      // Hmmm, where did this rule come from?
+      if (first
+          && (c < '0'
+          || (c > '9' && c < 'A')
+          || (c > 'Z' && c < 'a')
+          || (c > 'z'))) {
+        quote = true;
+      // } else if (c == ' ' || c == '\f' || c == '\t') {
+      } else if (c <= '#') {
+        // Some other chars at the start of a value caused the parser to fail, so for now
+        // encapsulate if we start in anything less than '#'.  We are being conservative
+        // by including the default comment char too.
+        quote = true;
+      } else {
+        while (pos < end) {
+          c = value[pos];
+          if (c=='\n' || c=='\r' || c==encapsulator || c==delim) {
+            quote = true;
+            break;
+          }
+          pos++;
+        }
+
+        if (!quote) {
+          pos = end-1;
+          c = value[pos];
+          // if (c == ' ' || c == '\f' || c == '\t') {
+          // Some other chars at the end caused the parser to fail, so for now
+          // encapsulate if we end in anything less than ' '
+          if (c <= ' ') {
+            quote = true;
+          }
+        }
+      }
     }
-    if (quote) {
-      out.print(escapeAndQuote(value));
-    } else {
-      out.print(value);
+
+    if (!quote) {
+      // no encapsulation needed - write out the original value
+      out.write(value, offset, len);
+      return;
+    }
+
+    // we hit something that needed encapsulation
+    out.write(encapsulator);
+
+    // Pick up where we left off: pos should be positioned on the first character that caused
+    // the need for encapsulation.
+    while (pos<end) {
+      char c = value[pos];
+      if (c==encapsulator) {
+        // write out the chunk up until this point
+
+        // add 1 to the length to write out the encapsulator also
+        out.write(value, start, pos-start+1);
+        // put the next starting position on the encapsulator so we will
+        // write it out again with the next string (effectively doubling it)
+        start = pos;
+      }
+      pos++;
     }
-    out.flush();
-  }
 
+    // write the last segment
+    out.write(value, start, pos-start);
+    out.write(encapsulator);    
+  }
 
   /**
-   * Enclose the value in quotes and escape the quote
-   * and comma characters that are inside.
+   * Print the string as the next value on the line. The value
+   * will be escaped or encapsulated as needed if checkForEscape==true
    *
-   * @param value needs to be escaped and quoted
-   * @return the value, escaped and quoted
+   * @param value value to be outputted.
    */
-  private String escapeAndQuote(String value) {
-    // the initial count is for the preceding and trailing quotes
-    int count = 2;
-    for (int i = 0; i < value.length(); i++) {
-      switch (value.charAt(i)) {
-        case '\"' :
-        case '\n' :
-        case '\r' :
-        case '\\' :
-          count++;
-          break;
-        default:
-          break;
-      }
+  public void print(String value, boolean checkForEscape) throws IOException {
+    if (!checkForEscape) {
+      // write directly from string
+      out.write(value);
+      return;
     }
-    StringBuffer sb = new StringBuffer(value.length() + count);
-    sb.append(strategy.getEncapsulator());
-    for (int i = 0; i < value.length(); i++) {
-      char c = value.charAt(i);
-
-      if (c == strategy.getEncapsulator()) {
-        sb.append('\\').append(c);
-        continue;
-      }
-      switch (c) {
-        case '\n' :
-          sb.append("\\n");
-          break;
-        case '\r' :
-          sb.append("\\r");
-          break;
-        case '\\' :
-          sb.append("\\\\");
-          break;
-        default :
-          sb.append(c);
-      }
+
+    if (buf.length < value.length()) {
+      buf = new char[value.length()];
     }
-    sb.append(strategy.getEncapsulator());
-    return sb.toString();
+
+    value.getChars(0, value.length(), buf, 0);
+    print(buf, 0, value.length(), checkForEscape);
   }
 
+  /**
+   * Print the string as the next value on the line. The value
+   * will be escaped or encapsulated as needed.
+   *
+   * @param value value to be outputted.
+   */
+  public void print(String value) throws IOException {
+    print(value, true);   
+  }
 }

Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVStrategy.java Wed Jun 16 16:12:34 2010
@@ -34,12 +34,16 @@ public class CSVStrategy implements Clon
     private boolean interpretUnicodeEscapes;
     private boolean ignoreEmptyLines;
 
+    // controls for output
+    private String printerNewline = "\n";
+
     // -2 is used to signal disabled, because it won't be confused with
     // an EOF signal (-1), and because \ufffe in UTF-16 would be
     // encoded as two chars (using surrogates) and thus there should never
     // be a collision with a real text char.
     public static char COMMENTS_DISABLED       = (char)-2;
     public static char ESCAPE_DISABLED         = (char)-2;
+    public static char ENCAPSULATOR_DISABLED   = (char)-2;
 
     public static CSVStrategy DEFAULT_STRATEGY = new CSVStrategy(',', '"', COMMENTS_DISABLED, ESCAPE_DISABLED, true, 
                                                                  true, false, true);
@@ -98,7 +102,6 @@ public class CSVStrategy implements Clon
              true, interpretUnicodeEscapes, ignoreEmptyLines);
     }
 
-
     public void setDelimiter(char delimiter) { this.delimiter = delimiter; }
     public char getDelimiter() { return this.delimiter; }
 
@@ -130,6 +133,13 @@ public class CSVStrategy implements Clon
     public void setIgnoreEmptyLines(boolean ignoreEmptyLines) { this.ignoreEmptyLines = ignoreEmptyLines; }
     public boolean getIgnoreEmptyLines() { return this.ignoreEmptyLines; }
 
+    public void setPrinterNewline(String newline) {
+      this.printerNewline = newline;
+    }
+    public String getPrinterNewline() {
+      return this.printerNewline;
+    }
+
     public Object clone() {
       try {
         return super.clone();

Modified: commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java (original)
+++ commons/sandbox/csv/trunk/src/java/org/apache/commons/csv/CSVUtils.java Wed Jun 16 16:12:34 2010
@@ -48,10 +48,10 @@ public class CSVUtils {
      * @return the CSV string, will be an empty string if the length of the
      * value array is 0
      */
-    public static String printLine(String[] values) {
+    public static String printLine(String[] values, CSVStrategy strategy) {
         // set up a CSVUtils
         StringWriter stringWriter = new StringWriter();
-        CSVPrinter csvPrinter = new CSVPrinter(stringWriter);
+        CSVPrinter csvPrinter = new CSVPrinter(stringWriter, strategy);
   
         // check for null values an "null" as strings and convert them
         // into the strings "null" and "\"null\""
@@ -64,8 +64,11 @@ public class CSVUtils {
         }
   
         // convert to CSV
-        csvPrinter.println(values);
-  
+        try {
+          csvPrinter.println(values);
+        } catch (IOException e) {
+          // should not happen with StringWriter
+        }
         // as the resulting string has \r\n at the end, we will trim that away
         return stringWriter.toString().trim();
     }

Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVParserTest.java Wed Jun 16 16:12:34 2010
@@ -45,6 +45,10 @@ public class CSVParserTest extends TestC
     TestCSVParser(Reader in) {
       super(in);
     }
+
+    TestCSVParser(Reader in, CSVStrategy strategy) {
+      super(in, strategy);
+    }
     /**
      * Calls super.nextToken() and prints out a String representation of token
      * type and content.
@@ -65,7 +69,6 @@ public class CSVParserTest extends TestC
   public void testNextToken1() throws IOException {
     String code = "abc,def, hijk,  lmnop,   qrst,uv ,wxy   ,z , ,";
     TestCSVParser parser = new TestCSVParser(new StringReader(code));
-    parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
     assertEquals(CSVParser.TT_TOKEN + ";abc;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";def;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";hijk;", parser.testNextToken());
@@ -88,10 +91,13 @@ public class CSVParserTest extends TestC
      * 
      */
     String code = "1,2,3,\na,b x,c\n#foo\n\nd,e,\n\n";
-    TestCSVParser parser = new TestCSVParser(new StringReader(code));
-    parser.getStrategy().setIgnoreEmptyLines(false);
-    parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
-    parser.getStrategy().setCommentStart('#');
+    CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
+    // strategy.setIgnoreEmptyLines(false);
+    strategy.setCommentStart('#');
+
+    TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+
+
     assertEquals(CSVParser.TT_TOKEN + ";1;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";2;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";3;", parser.testNextToken());
@@ -114,9 +120,10 @@ public class CSVParserTest extends TestC
      *       \,,
      */
     String code = "a,\\,,b\n\\,,";
-    TestCSVParser parser = new TestCSVParser(new StringReader(code));
-    parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
-    parser.getStrategy().setCommentStart('#');
+    CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
+    strategy.setCommentStart('#');
+    TestCSVParser parser = new TestCSVParser(new StringReader(code), strategy);
+
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     // an unquoted single backslash is not an escape char
     assertEquals(CSVParser.TT_TOKEN + ";\\;", parser.testNextToken());
@@ -138,7 +145,6 @@ public class CSVParserTest extends TestC
      String code = 
       "a,\"foo\",b\na,   \" foo\",b\na,\"foo \"  ,b\na,  \" foo \"  ,b";
      TestCSVParser parser = new TestCSVParser(new StringReader(code));
-     parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
      assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
      assertEquals(CSVParser.TT_TOKEN + ";foo;", parser.testNextToken());
      assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
@@ -159,7 +165,6 @@ public class CSVParserTest extends TestC
     String code = 
       "a,\"foo\n\",b\n\"foo\n  baar ,,,\"\n\"\n\t \n\"";
     TestCSVParser parser = new TestCSVParser(new StringReader(code));
-    parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     assertEquals(CSVParser.TT_TOKEN + ";foo\n;", parser.testNextToken());
     assertEquals(CSVParser.TT_EORECORD + ";b;", parser.testNextToken());
@@ -177,8 +182,7 @@ public class CSVParserTest extends TestC
      *       ;;
      */
     String code = "a;'b and '' more\n'\n!comment;;;;\n;;";
-    TestCSVParser parser = new TestCSVParser(new StringReader(code));
-    parser.setStrategy( new CSVStrategy(';', '\'', '!') );
+    TestCSVParser parser = new TestCSVParser(new StringReader(code), new CSVStrategy(';', '\'', '!'));
     assertEquals(CSVParser.TT_TOKEN + ";a;", parser.testNextToken());
     assertEquals(
       CSVParser.TT_EORECORD + ";b and ' more\n;", 
@@ -265,8 +269,7 @@ public class CSVParserTest extends TestC
       {""},
       {"world", ""}
     };
-    CSVParser parser = new CSVParser(new StringReader(code));
-    parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+    CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
     String[][] tmp = parser.getAllValues();
     assertEquals(res.length, tmp.length);
     assertTrue(tmp.length > 0);
@@ -294,8 +297,7 @@ public class CSVParserTest extends TestC
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
-      CSVParser parser = new CSVParser(new StringReader(code));
-      parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+      CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
@@ -324,7 +326,6 @@ public class CSVParserTest extends TestC
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code));
-      parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
@@ -349,8 +350,7 @@ public class CSVParserTest extends TestC
     String code;
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
-      CSVParser parser = new CSVParser(new StringReader(code));
-      parser.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+      CSVParser parser = new CSVParser(new StringReader(code), CSVStrategy.EXCEL_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
@@ -374,7 +374,6 @@ public class CSVParserTest extends TestC
     for (int codeIndex = 0; codeIndex < codes.length; codeIndex++) {
       code = codes[codeIndex];
       CSVParser parser = new CSVParser(new StringReader(code));
-      parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
       String[][] tmp = parser.getAllValues();
       assertEquals(res.length, tmp.length);
       assertTrue(tmp.length > 0);
@@ -457,6 +456,61 @@ public class CSVParserTest extends TestC
     }
   }
 
+  public void testBackslashEscaping2() throws IOException {
+
+    // To avoid confusion over the need for escaping chars in java code,
+    // We will test with a forward slash as the escape char, and a single
+    // quote as the encapsulator.
+
+    String code = ""
+      + " , , \n"           // 1)
+      + " \t ,  , \n"       // 2)
+      + " // , /, , /,\n"   // 3)
+      + "";
+    String[][] res = {
+        { " ", " ", " " },         // 1
+        { " \t ", "  ", " " },         // 2
+        { " / ", " , ", " ," },         //3
+      };
+
+
+    CSVStrategy strategy = new CSVStrategy(',',CSVStrategy.ENCAPSULATOR_DISABLED,CSVStrategy.COMMENTS_DISABLED,'/',false,false,true,true);
+
+    CSVParser parser = new CSVParser(new StringReader(code), strategy);
+    String[][] tmp = parser.getAllValues();
+    assertTrue(tmp.length > 0);
+
+    if (!CSVPrinterTest.equals(res, tmp)) {
+      assertTrue(false);
+    }
+
+  }
+
+
+  public void testDefaultStrategy() throws IOException {
+
+    String code = ""
+        + "a,b\n"            // 1)
+        + "\"\n\",\" \"\n"   // 2)
+        + "\"\",#\n"   // 2)
+        ;
+    String[][] res = {
+        { "a", "b" },
+        { "\n", " " },
+        { "", "#" },    // WARNING: TODO: this causes a hang if comments are enabled
+    };
+
+    CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
+    assertEquals(CSVStrategy.COMMENTS_DISABLED, strategy.getCommentStart());
+
+    CSVParser parser = new CSVParser(new StringReader(code), strategy);
+    String[][] tmp = parser.getAllValues();
+    assertTrue(tmp.length > 0);
+
+    if (!CSVPrinterTest.equals(res, tmp)) {
+      assertTrue(false);
+    }
+  }
 
 
     public void testUnicodeEscape() throws IOException {
@@ -502,8 +556,7 @@ public class CSVParserTest extends TestC
     // From SANDBOX-153
      public void testDelimiterIsWhitespace() throws IOException {
          String code = "one\ttwo\t\tfour \t five\t six";
-         TestCSVParser parser = new TestCSVParser(new StringReader(code));
-         parser.setStrategy(CSVStrategy.TDF_STRATEGY);
+         TestCSVParser parser = new TestCSVParser(new StringReader(code), CSVStrategy.TDF_STRATEGY);
          assertEquals(CSVParser.TT_TOKEN + ";one;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";two;", parser.testNextToken());
          assertEquals(CSVParser.TT_TOKEN + ";;", parser.testNextToken());

Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVPrinterTest.java Wed Jun 16 16:12:34 2010
@@ -16,7 +16,12 @@
  */
 package org.apache.commons.csv;
 
+import java.io.IOException;
+import java.io.StringReader;
 import java.io.StringWriter;
+import java.util.Arrays;
+import java.util.Random;
+
 import junit.framework.Test;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
@@ -26,48 +31,161 @@ import junit.framework.TestSuite;
  */
 public class CSVPrinterTest extends TestCase {
   
-  String lineSeparator = System.getProperty("line.separator");
+  String lineSeparator = "\n";
 
-  public void testPrinter1() {
+  public void testPrinter1() throws IOException {
     StringWriter sw = new StringWriter();
-    CSVPrinter printer = new CSVPrinter(sw);
+    CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
     String[] line1 = {"a", "b"};
     printer.println(line1);
     assertEquals("a,b" + lineSeparator, sw.toString());
   }
 
-  public void testPrinter2() {
+  public void testPrinter2() throws IOException {
     StringWriter sw = new StringWriter();
-    CSVPrinter printer = new CSVPrinter(sw);
+    CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
     String[] line1 = {"a,b", "b"};
     printer.println(line1);
     assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
   }
 
-  public void testPrinter3() {
+  public void testPrinter3() throws IOException {
     StringWriter sw = new StringWriter();
-    CSVPrinter printer = new CSVPrinter(sw);
+    CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.DEFAULT_STRATEGY);
     String[] line1 = {"a, b", "b "};
     printer.println(line1);
     assertEquals("\"a, b\",\"b \"" + lineSeparator, sw.toString());
   }
 
-  public void testExcelPrinter1() {
+  public void testExcelPrinter1() throws IOException {
     StringWriter sw = new StringWriter();
-    CSVPrinter printer = new CSVPrinter(sw);
-    printer.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+    CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
     String[] line1 = {"a", "b"};
     printer.println(line1);
     assertEquals("a,b" + lineSeparator, sw.toString());
   }
 
-  public void testExcelPrinter2() {
+  public void testExcelPrinter2() throws IOException {
     StringWriter sw = new StringWriter();
-    CSVPrinter printer = new CSVPrinter(sw);
-    printer.setStrategy(CSVStrategy.EXCEL_STRATEGY);
+    CSVPrinter printer = new CSVPrinter(sw, CSVStrategy.EXCEL_STRATEGY);
     String[] line1 = {"a,b", "b"};
     printer.println(line1);
     assertEquals("\"a,b\",b" + lineSeparator, sw.toString());
   }
 
+
+  
+  public void testRandom() throws Exception {
+    int iter=10000;
+    strategy = CSVStrategy.DEFAULT_STRATEGY;
+    doRandom(iter);
+    strategy = CSVStrategy.EXCEL_STRATEGY;
+    doRandom(iter);
+
+    // Strategy for MySQL
+    strategy = new CSVStrategy('\t', CSVStrategy.ENCAPSULATOR_DISABLED, CSVStrategy.COMMENTS_DISABLED,'\\',false, false, false, false);
+    doRandom(iter);
+  }
+
+  Random r = new Random();
+  CSVStrategy strategy;
+
+  public void doRandom(int iter) throws Exception {
+    for (int i=0; i<iter; i++) {
+      doOneRandom();
+    }
+  }
+
+  public void doOneRandom() throws Exception {
+    int nLines = r.nextInt(4)+1;
+    int nCol = r.nextInt(3)+1;
+    // nLines=1;nCol=2;
+    String[][] lines = new String[nLines][];
+    for (int i=0; i<nLines; i++) {
+      String[] line = new String[nCol];
+      lines[i] = line;
+      for (int j=0; j<nCol; j++) {
+        line[j] = randStr();
+      }
+    }
+
+    StringWriter sw = new StringWriter();
+    CSVPrinter printer = new CSVPrinter(sw, strategy);
+
+    for (int i=0; i<nLines; i++) {
+      // for (int j=0; j<lines[i].length; j++) System.out.println("### VALUE=:" + printable(lines[i][j]));      
+      printer.println(lines[i]);
+    }
+
+    printer.flush();
+    String result = sw.toString();
+    // System.out.println("### :" + printable(result));
+
+    StringReader reader = new StringReader(result);
+
+    CSVParser parser = new CSVParser(reader, strategy);
+    String[][] parseResult = parser.getAllValues();
+
+    if (!equals(lines, parseResult)) {
+      System.out.println("Printer output :" + printable(result));
+      assertTrue(false);
+    }
+  }
+
+  public static boolean equals(String[][] a, String[][] b) {
+    for (int i=0; i<a.length; i++) {
+      String[] linea = a[i];
+      String[] lineb = b[i];
+      for (int j=0; j<linea.length; j++) {
+        String aval = linea[j];
+        String bval = lineb[j];
+        if (!aval.equals(bval)) {
+          System.out.println("expected  :" + printable(aval));
+          System.out.println("got       :" + printable(bval));
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+  public static String printable(String s) {
+    StringBuffer sb = new StringBuffer();
+    for (int i=0; i<s.length(); i++) {
+      char ch = s.charAt(i);
+      if (ch<=' ' || ch>=128) {
+        sb.append("(" + (int)ch + ")");
+      } else {
+        sb.append(ch);
+      }
+    }
+    return sb.toString();
+  }
+
+  public String randStr() {
+    int sz = r.nextInt(20);
+    // sz = r.nextInt(3);
+    char[] buf = new char[sz];
+    for (int i=0; i<sz; i++) {
+      // stick in special chars with greater frequency
+      char ch;
+      int what = r.nextInt(20);
+      switch (what) {
+        case 0: ch = '\r'; break;
+        case 1: ch = '\n'; break;
+        case 2: ch = '\t'; break;
+        case 3: ch = '\f'; break;
+        case 4: ch = ' ';  break;
+        case 5: ch = ',';  break;
+        case 6: ch = '"';  break;
+        case 7: ch = '\''; break;
+        case 8: ch = '\\'; break;
+        default: ch = (char)r.nextInt(300); break;
+        // default: ch = 'a'; break;
+      }
+      buf[i] = ch;
+    }
+    return new String(buf);
+  }
+
 }

Modified: commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java
URL: http://svn.apache.org/viewvc/commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java?rev=955284&r1=955283&r2=955284&view=diff
==============================================================================
--- commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java (original)
+++ commons/sandbox/csv/trunk/src/test/org/apache/commons/csv/CSVStrategyTest.java Wed Jun 16 16:12:34 2010
@@ -35,8 +35,7 @@ public class CSVStrategyTest extends Tes
   //   getters / setters
   // ======================================================
   public void testGetSetCommentStart() {
-    CSVParser parser = new CSVParser(new StringReader("hello world"));
-    CSVStrategy strategy = parser.getStrategy();
+    CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
     strategy.setCommentStart('#');
     assertEquals(strategy.getCommentStart(), '#');
     strategy.setCommentStart('!');
@@ -44,8 +43,7 @@ public class CSVStrategyTest extends Tes
   }
 
   public void testGetSetEncapsulator() {
-    CSVParser parser = new CSVParser(new StringReader("hello world"));
-    CSVStrategy strategy = parser.getStrategy();
+    CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
     strategy.setEncapsulator('"');
     assertEquals(strategy.getEncapsulator(), '"');
     strategy.setEncapsulator('\'');
@@ -53,8 +51,7 @@ public class CSVStrategyTest extends Tes
   }
 
   public void testGetSetDelimiter() {
-    CSVParser parser = new CSVParser(new StringReader("hello world"));
-    CSVStrategy strategy = parser.getStrategy();
+    CSVStrategy strategy = (CSVStrategy)CSVStrategy.DEFAULT_STRATEGY.clone();
     strategy.setDelimiter(';');
     assertEquals(strategy.getDelimiter(), ';');
     strategy.setDelimiter(',');
@@ -64,8 +61,7 @@ public class CSVStrategyTest extends Tes
   }
 
   public void testSetCSVStrategy() {
-    CSVParser parser = new CSVParser(new StringReader("hello world"));
-    CSVStrategy strategy = parser.getStrategy();
+    CSVStrategy strategy = CSVStrategy.DEFAULT_STRATEGY;
     // default settings
     assertEquals(strategy.getDelimiter(), ',');
     assertEquals(strategy.getEncapsulator(), '"');
@@ -74,7 +70,6 @@ public class CSVStrategyTest extends Tes
     assertEquals(false, strategy.getUnicodeEscapeInterpretation());
     assertEquals(true,  strategy.getIgnoreEmptyLines());
     // explicit csv settings
-    parser.setStrategy(CSVStrategy.DEFAULT_STRATEGY);
     assertEquals(strategy.getDelimiter(), ',');
     assertEquals(strategy.getEncapsulator(), '"');
     assertEquals(strategy.getCommentStart(), CSVStrategy.COMMENTS_DISABLED);