You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2014/09/17 16:20:41 UTC
svn commit: r1625607 - in /jena/trunk/jena-arq: ./
src/main/java/org/apache/jena/atlas/csv/
src/main/java/org/apache/jena/riot/lang/
src/test/java/org/apache/jena/atlas/csv/
Author: andy
Date: Wed Sep 17 14:20:41 2014
New Revision: 1625607
URL: http://svn.apache.org/r1625607
Log:
JENA-699 : Use Apache Commons CSV parser.
Removed:
jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVToken.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVTokenIterator.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVTokenType.java
Modified:
jena/trunk/jena-arq/pom.xml
jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
Modified: jena/trunk/jena-arq/pom.xml
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/pom.xml?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/pom.xml (original)
+++ jena/trunk/jena-arq/pom.xml Wed Sep 17 14:20:41 2014
@@ -109,6 +109,12 @@
<artifactId>jcl-over-slf4j</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.apache.commons</groupId>
+ <artifactId>commons-csv</artifactId>
+ <version>1.0</version>
+ </dependency>
+
</dependencies>
<build>
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParseException.java Wed Sep 17 14:20:41 2014
@@ -20,6 +20,7 @@ package org.apache.jena.atlas.csv;
class CSVParseException extends RuntimeException
{
+ private static final long serialVersionUID = -7804460281144630746L;
public CSVParseException(String msg, Throwable cause) { super(msg, cause) ; }
public CSVParseException(String msg) { super(msg) ; }
public CSVParseException(Throwable cause) { super(cause) ; }
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/atlas/csv/CSVParser.java Wed Sep 17 14:20:41 2014
@@ -18,109 +18,70 @@
package org.apache.jena.atlas.csv ;
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.Iterator ;
-import java.util.List ;
-
-import org.apache.jena.atlas.io.IO ;
-import org.apache.jena.atlas.iterator.IteratorSlotted ;
-import org.apache.jena.atlas.iterator.PeekIterator ;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.commons.csv.CSVFormat;
+import org.apache.commons.csv.CSVRecord;
+import org.apache.jena.atlas.io.IO;
-/** Written specifically to handle SPARQL results CSv files.
- * Replace with a real parser (e.g. Apache Commons CSV when released)
+/** Written specifically to handle SPARQL results CSV files.
+ * Acts as a wrapper for Commons CSV parser.
*/
public class CSVParser implements Iterable<List<String>>
{
+
+ private final org.apache.commons.csv.CSVParser parser;
+
public static CSVParser create(String filename) {
InputStream input = IO.openFile(filename) ;
return create(input) ;
}
public static CSVParser create(InputStream input) {
- CSVTokenIterator iter = new CSVTokenIterator(input) ;
- CSVParser parser = new CSVParser(iter) ;
+ CSVParser parser = new CSVParser(new InputStreamReader(input)) ;
return parser ;
}
public static CSVParser create(Reader input) {
- CSVTokenIterator iter = new CSVTokenIterator(input) ;
- CSVParser parser = new CSVParser(iter) ;
+ CSVParser parser = new CSVParser(input) ;
return parser ;
}
- private final CSVTokenIterator iter ;
- private final PeekIterator<CSVToken> pIter ;
-
- public CSVParser(CSVTokenIterator iter) {
- this.iter = iter ;
- this.pIter = new PeekIterator<>(iter) ;
+ public CSVParser(Reader input) {
+ try {
+ this.parser = CSVFormat.EXCEL.withQuote('\'').parse(input);
+ } catch (IOException e) {
+ throw new CSVParseException("Failed to create the CSV parser: " + e.getMessage(), e);
+ }
}
@Override
public Iterator<List<String>> iterator() {
- return new IteratorSlotted<List<String>>() {
- @Override
- protected List<String> moveToNext() {
- return CSVParser.this.parse1() ;
+ List<List<String>> list = new ArrayList<>();
+ for (CSVRecord record : parser) {
+ List<String> row = new ArrayList<>();
+ for (String columnValue : record) {
+ row.add(columnValue);
}
-
- @Override
- protected boolean hasMore() {
- return true ;
- }};
+ list.add(row);
+ }
+ return list.iterator();
}
public List<String> parse1() {
- // Get rid of switches. break problems.
- List<String> line = new ArrayList<>(100) ;
-
- loop: while (pIter.hasNext()) {
- CSVToken t = pIter.next() ;
- switch (t.type) {
- case EOF :
- return null ;
- case NL :
- // Blank line = one or none?
- line.add("") ;
- return line ;
- case STRING :
- case QSTRING :
- line.add(t.image) ;
- break ;
- case COMMA :
- // Immediate COMMA is an empty term.
- line.add("") ;
- continue loop ;
- default :
- exception("Syntax error: expected a string or comma.", t) ;
- }
- // Expect COMMA or NL
- if ( !pIter.hasNext() ) {
- // File ends, no NL.
- return line ;
- }
- // Look at separateor or end
- CSVToken t2 = pIter.peek() ;
- switch (t2.type) {
- case COMMA :
- pIter.next() ;
- continue loop ;
- case NL :
- case EOF :
- pIter.next() ;
- return line ;
- default :
- exception("Syntax error: expect comma or end of line.", t) ;
- }
+ Iterator<List<String>> iterator = iterator();
+ if (iterator.hasNext())
+ {
+ final List<String> firstRow = iterator.next();
+ return firstRow;
}
- return null ;
- }
- static void exception(String msg, CSVToken t) {
- if ( t != null && t.line >= 0 && t.col > 0 )
- msg = String.format("[%s, %s] %s", t.line, t.col, msg) ;
- throw new CSVParseException(msg) ;
+ return null;
}
static void exception(String msg, long line, long col) {
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Wed Sep 17 14:20:41 2014
@@ -18,19 +18,24 @@
package org.apache.jena.riot.lang;
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.csv.CSVParser ;
-import org.apache.jena.riot.Lang ;
-import org.apache.jena.riot.RDFLanguages ;
-import org.apache.jena.riot.system.* ;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.graph.NodeFactory ;
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.IRILib;
+import org.apache.jena.riot.system.IRIResolver;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
public class LangCSV implements LangRIOT {
@@ -83,10 +88,9 @@ public class LangCSV implements LangRIOT
sink.start();
CSVParser parser = (input != null) ? CSVParser.create(input)
: CSVParser.create(reader);
- List<String> row = null;
ArrayList<Node> predicates = new ArrayList<Node>();
int rowNum = 0;
- while ((row = parser.parse1()) != null) {
+ for (List<String> row : parser) {
if (rowNum == 0) {
for (String column : row) {
@@ -113,7 +117,7 @@ public class LangCSV implements LangRIOT
Node o;
try {
// Try for a double.
- double d = Double.parseDouble(columnValue);
+ Double.parseDouble(columnValue);
o = NodeFactory.createLiteral(columnValue,
XSDDatatype.XSDdouble);
} catch (Exception e) {
Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java?rev=1625607&r1=1625606&r2=1625607&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/atlas/csv/TestCSVParser.java Wed Sep 17 14:20:41 2014
@@ -18,18 +18,12 @@
package org.apache.jena.atlas.csv;
-import static org.apache.jena.atlas.csv.CSVTokenType.NL ;
-import static org.apache.jena.atlas.csv.CSVTokenType.QSTRING ;
-import static org.apache.jena.atlas.csv.CSVTokenType.STRING ;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.List;
-import java.io.ByteArrayInputStream ;
-import java.io.InputStream ;
-import java.io.UnsupportedEncodingException ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.junit.BaseTest ;
-import org.junit.Test ;
+import org.apache.jena.atlas.junit.BaseTest;
+import org.junit.Test;
public class TestCSVParser extends BaseTest
{
@@ -38,38 +32,6 @@ public class TestCSVParser extends BaseT
String[] row3 = { "a", "b" } ;
String[] row4 = { "123", "\"aa\"", "'bb'", "\"''\"Z", "A'\"\"'" } ;
- CSVToken t1 = new CSVToken(-1, -1, CSVTokenType.COMMA, ",") ;
-
-
- @Test public void csv_parse_term_01() { csvTerm("123", STRING, "123") ; }
- @Test public void csv_parse_term_02() { csvTerm("aa", STRING, "aa") ; }
- @Test public void csv_parse_term_03() { csvTerm("\" \"", QSTRING, " ") ; }
- @Test public void csv_parse_term_04() { csvTerm("' '", QSTRING, " ") ; }
-
- @Test public void csv_parse_term_05() { csvTerm("\"a\"\"b\"", QSTRING, "a\"b") ; }
- @Test public void csv_parse_term_06() { csvTerm("'a\"b'", QSTRING, "a\"b") ; }
-
- @Test public void csv_parse_term_07() { csvTerm("\n", NL, "\n") ; }
- @Test public void csv_parse_term_08() { csvTerm("\r", NL, "\n") ; }
- @Test public void csv_parse_term_09() { csvTerm("\r\n", NL, "\n") ; }
-
- private static void csvTerm(String input, CSVTokenType type, String output)
- {
- try
- {
- CSVToken expected = new CSVToken(-1, -1, type, output) ;
-
- InputStream in = new ByteArrayInputStream(input.getBytes("UTF-8")) ;
- CSVTokenIterator iter = new CSVTokenIterator(in) ;
- assertTrue(iter.hasNext()) ;
- CSVToken t = iter.next() ;
- assertTrue(expected.same(t)) ;
- } catch (UnsupportedEncodingException e)
- {
- throw new RuntimeException(e) ;
- }
- }
-
@Test public void csv_parse_01() { csv("\n", new String[][] {{""}}) ; }
@Test public void csv_parse_02() { csv("a\n", new String[][] {{"a"}}) ; }
@Test public void csv_parse_03() { csv("a,b\n", new String[][] {{"a", "b"}}) ; }
@@ -101,21 +63,11 @@ public class TestCSVParser extends BaseT
private static void csv(String input, List<List<String>> answers)
{
List<List<String>> x = new ArrayList<>() ;
- try {
- InputStream in = new ByteArrayInputStream(input.getBytes("UTF-8")) ;
- CSVTokenIterator iter = new CSVTokenIterator(in) ;
- CSVParser parser = new CSVParser(iter) ;
- List<String> row = null ;
- while ( (row=parser.parse1())!=null) {
- x.add(row) ;
- }
- assertEquals(answers, x) ;
- } catch (UnsupportedEncodingException e)
- {
- throw new RuntimeException(e) ;
+ CSVParser parser = new CSVParser(new StringReader(input)) ;
+ for (List<String> row : parser) {
+ x.add(row) ;
}
-
-
+ assertEquals(answers, x) ;
}
}