You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/09/17 16:20:41 UTC

svn commit: r1625607 - in /jena/trunk/jena-arq: ./ src/main/java/org/apache/jena/atlas/csv/ src/main/java/org/apache/jena/riot/lang/ src/test/java/org/apache/jena/atlas/csv/

Author: andy
Date: Wed Sep 17 14:20:41 2014
New Revision: 1625607

URL: http://svn.apache.org/r1625607
Log:
JENA-699 : Use Apache Commons CSV parser.

Removed:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVToken.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVTokenIterator.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVTokenType.java
Modified:
    jena/trunk/jena-arq/pom.xml
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
    jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java

Modified: jena/trunk/jena-arq/pom.xml
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/pom.xml?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/pom.xml (original)
+++ jena/trunk/jena-arq/pom.xml Wed Sep 17 14:20:41 2014
@@ -109,6 +109,12 @@
       <artifactId>jcl-over-slf4j</artifactId>
     </dependency>
 
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-csv</artifactId>
+      <version>1.0</version>
+    </dependency>
+
   </dependencies>
 
   <build>

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java Wed Sep 17 14:20:41 2014
@@ -20,6 +20,7 @@ package org.apache.jena.atlas.csv;
 
 class CSVParseException extends RuntimeException
 {
+    private static final long serialVersionUID = -7804460281144630746L;
     public CSVParseException(String msg, Throwable cause)    { super(msg, cause) ; }
     public CSVParseException(String msg)                     { super(msg) ; }
     public CSVParseException(Throwable cause)                { super(cause) ; }

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java Wed Sep 17 14:20:41 2014
@@ -18,109 +18,70 @@
 
 package org.apache.jena.atlas.csv ;
 
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.Iterator ;
-import java.util.List ;
-
-import org.apache.jena.atlas.io.IO ;
-import org.apache.jena.atlas.iterator.IteratorSlotted ;
-import org.apache.jena.atlas.iterator.PeekIterator ;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.jena.atlas.io.IO;
 
-/** Written specifically to handle SPARQL results CSv files.
- *  Replace with a real parser (e.g. Apache Commons CSV when released)
+/** Written specifically to handle SPARQL results CSV files.
+ *  Acts as a wrapper for Commons CSV parser.
  */
 public class CSVParser implements Iterable<List<String>>
 {
+    
+    private final org.apache.commons.csv.CSVParser parser;
+    
     public static CSVParser create(String filename) {
         InputStream input = IO.openFile(filename) ;
         return create(input) ;
     }
 
     public static CSVParser create(InputStream input) {
-        CSVTokenIterator iter = new CSVTokenIterator(input) ;
-        CSVParser parser = new CSVParser(iter) ;
+        CSVParser parser = new CSVParser(new InputStreamReader(input)) ;
         return parser ; 
     }
     
     public static CSVParser create(Reader input) {
-        CSVTokenIterator iter = new CSVTokenIterator(input) ;
-        CSVParser parser = new CSVParser(iter) ;
+        CSVParser parser = new CSVParser(input) ;
         return parser ; 
     }
 
-    private final CSVTokenIterator iter ;
-    private final PeekIterator<CSVToken> pIter ;
-
-    public CSVParser(CSVTokenIterator iter) {
-        this.iter = iter ;
-        this.pIter = new PeekIterator<>(iter) ;
+    public CSVParser(Reader input) {
+        try {
+            this.parser = CSVFormat.EXCEL.withQuote('\'').parse(input);
+        } catch (IOException e) {
+            throw new CSVParseException("Failed to create the CSV parser: " + e.getMessage(), e);
+        }
     }
     
     @Override
     public Iterator<List<String>> iterator() {
-        return new IteratorSlotted<List<String>>() {
-            @Override
-            protected List<String> moveToNext() {
-                return CSVParser.this.parse1() ;
+        List<List<String>> list = new ArrayList<>();
+        for (CSVRecord record : parser) {
+            List<String> row = new ArrayList<>();
+            for (String columnValue : record) {
+                row.add(columnValue);
             }
-
-            @Override
-            protected boolean hasMore() {
-                return true ;
-            }};
+            list.add(row);
+        }
+        return list.iterator();
     }
 
     public List<String> parse1() {
-        // Get rid of switches. break problems.
-        List<String> line = new ArrayList<>(100) ;
-        
-        loop: while (pIter.hasNext()) {
-            CSVToken t = pIter.next() ;
-            switch (t.type) {
-                case EOF :
-                    return null ;
-                case NL :
-                    // Blank line = one or none?
-                    line.add("") ;
-                    return line ;
-                case STRING :
-                case QSTRING :
-                    line.add(t.image) ;
-                    break ;
-                case COMMA :
-                    // Immediate COMMA is an empty term.
-                    line.add("") ;
-                    continue loop ;
-                default :
-                    exception("Syntax error: expected a string or comma.", t) ;
-            }
-            // Expect COMMA or NL
-            if ( !pIter.hasNext() ) {
-                // File ends, no NL.
-                return line ;
-            }
-            // Look at separateor or end
-            CSVToken t2 = pIter.peek() ;
-            switch (t2.type) {
-                case COMMA :
-                    pIter.next() ;
-                    continue loop ;
-                case NL :
-                case EOF :
-                    pIter.next() ;
-                    return line ;
-                default :
-                    exception("Syntax error: expect comma or end of line.", t) ;
-            }
+        Iterator<List<String>> iterator = iterator();
+        if (iterator.hasNext()) 
+        {
+            final List<String> firstRow = iterator.next();
+            return firstRow;
         }
-        return null ;
-    }
-    static void exception(String msg, CSVToken t) {
-        if ( t != null && t.line >= 0 && t.col > 0 )
-            msg = String.format("[%s, %s] %s", t.line, t.col, msg) ;
-        throw new CSVParseException(msg) ;
+        return null;
     }
 
     static void exception(String msg, long line, long col) {

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Wed Sep 17 14:20:41 2014
@@ -18,19 +18,24 @@
 
 package org.apache.jena.riot.lang;
 
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.csv.CSVParser ;
-import org.apache.jena.riot.Lang ;
-import org.apache.jena.riot.RDFLanguages ;
-import org.apache.jena.riot.system.* ;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.graph.NodeFactory ;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.IRILib;
+import org.apache.jena.riot.system.IRIResolver;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
 
 public class LangCSV implements LangRIOT {
 
@@ -83,10 +88,9 @@ public class LangCSV implements LangRIOT
 		sink.start();
 		CSVParser parser = (input != null) ? CSVParser.create(input)
 				: CSVParser.create(reader);
-		List<String> row = null;
 		ArrayList<Node> predicates = new ArrayList<Node>();
 		int rowNum = 0;
-		while ((row = parser.parse1()) != null) {
+		for (List<String> row : parser) {
 			
 			if (rowNum == 0) {
 				for (String column : row) {
@@ -113,7 +117,7 @@ public class LangCSV implements LangRIOT
 					Node o;
 					try {
 						// Try for a double.
-						double d = Double.parseDouble(columnValue);
+						Double.parseDouble(columnValue);
 						o = NodeFactory.createLiteral(columnValue,
 								XSDDatatype.XSDdouble);
 					} catch (Exception e) {

Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java Wed Sep 17 14:20:41 2014
@@ -18,18 +18,12 @@
 
 package org.apache.jena.atlas.csv;
 
-import static org.apache.jena.atlas.csv.CSVTokenType.NL ;
-import static org.apache.jena.atlas.csv.CSVTokenType.QSTRING ;
-import static org.apache.jena.atlas.csv.CSVTokenType.STRING ;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
 
-import java.io.ByteArrayInputStream ;
-import java.io.InputStream ;
-import java.io.UnsupportedEncodingException ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.junit.BaseTest ;
-import org.junit.Test ;
+import org.apache.jena.atlas.junit.BaseTest;
+import org.junit.Test;
 
 public class TestCSVParser extends BaseTest
 {
@@ -38,38 +32,6 @@ public class TestCSVParser extends BaseT
     String[] row3 = { "a", "b" } ;
     String[] row4 = { "123", "\"aa\"", "'bb'", "\"''\"Z", "A'\"\"'" } ;
     
-    CSVToken t1 = new CSVToken(-1, -1, CSVTokenType.COMMA, ",") ;
-    
-    
-    @Test public void csv_parse_term_01() {  csvTerm("123", STRING, "123") ; }
-    @Test public void csv_parse_term_02()  { csvTerm("aa", STRING, "aa") ; }
-    @Test public void csv_parse_term_03()  { csvTerm("\" \"", QSTRING, " ") ; }
-    @Test public void csv_parse_term_04()  { csvTerm("' '", QSTRING, " ") ; }
-    
-    @Test public void csv_parse_term_05()  { csvTerm("\"a\"\"b\"", QSTRING, "a\"b") ; }
-    @Test public void csv_parse_term_06()  { csvTerm("'a\"b'", QSTRING, "a\"b") ; }
-    
-    @Test public void csv_parse_term_07()  { csvTerm("\n", NL, "\n") ; }
-    @Test public void csv_parse_term_08()  { csvTerm("\r", NL, "\n") ; }
-    @Test public void csv_parse_term_09()  { csvTerm("\r\n", NL, "\n") ; }
-    
-    private static void csvTerm(String input, CSVTokenType type, String output)
-    {
-        try
-        {
-            CSVToken expected = new CSVToken(-1, -1, type, output) ;
-
-            InputStream in = new ByteArrayInputStream(input.getBytes("UTF-8")) ;
-            CSVTokenIterator iter = new CSVTokenIterator(in) ;
-            assertTrue(iter.hasNext()) ;
-            CSVToken t = iter.next() ;
-            assertTrue(expected.same(t)) ;
-        } catch (UnsupportedEncodingException e)
-        {
-            throw new RuntimeException(e) ;
-        }
-    }
-    
     @Test public void csv_parse_01() { csv("\n", new String[][] {{""}}) ; }
     @Test public void csv_parse_02() { csv("a\n", new String[][] {{"a"}}) ; }
     @Test public void csv_parse_03() { csv("a,b\n", new String[][] {{"a", "b"}}) ; }
@@ -101,21 +63,11 @@ public class TestCSVParser extends BaseT
     private static void csv(String input, List<List<String>> answers)
     {
         List<List<String>> x = new ArrayList<>() ;
-        try {
-            InputStream in = new ByteArrayInputStream(input.getBytes("UTF-8")) ;
-            CSVTokenIterator iter = new CSVTokenIterator(in) ;
-            CSVParser parser = new CSVParser(iter) ;
-            List<String> row = null ;
-            while ( (row=parser.parse1())!=null) {
-                x.add(row) ;
-            }
-            assertEquals(answers, x) ;
-        } catch (UnsupportedEncodingException e)
-        {
-            throw new RuntimeException(e) ;
+        CSVParser parser = new CSVParser(new StringReader(input)) ;
+        for (List<String> row : parser) {
+            x.add(row) ;
         }
-        
-        
+        assertEquals(answers, x) ;
     }
 }