You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by jp...@apache.org on 2014/06/15 17:48:19 UTC
svn commit: r1602712 - in /jena/trunk/jena-arq/src:
main/java/org/apache/jena/riot/lang/LangCSV.java
test/java/org/apache/jena/riot/lang/TestLangCSV.java test/resources/test.csv
Author: jpz6311whu
Date: Sun Jun 15 15:48:18 2014
New Revision: 1602712
URL: http://svn.apache.org/r1602712
Log:
URI encoding for CSV column (JENA-625), improve corresponding testing
Modified:
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java
jena/trunk/jena-arq/src/test/resources/test.csv
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Sun Jun 15 15:48:18 2014
@@ -18,105 +18,149 @@
package org.apache.jena.riot.lang;
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.csv.CSVParser ;
-import org.apache.jena.riot.Lang ;
-import org.apache.jena.riot.RDFLanguages ;
-import org.apache.jena.riot.system.ErrorHandler ;
-import org.apache.jena.riot.system.ParserProfile ;
-import org.apache.jena.riot.system.RiotLib ;
-import org.apache.jena.riot.system.StreamRDF ;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.graph.NodeFactory ;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.IRIResolver;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
public class LangCSV implements LangRIOT {
-
+
public static final String CSV_PREFIX = "http://w3c/future-csv-vocab/";
public static final String CSV_ROW = CSV_PREFIX + "row";
-
- private InputStream input = null ;
- private Reader reader = null ;
- private String xmlBase ;
- private String filename ;
- private StreamRDF sink ;
- private ParserProfile profile ; // Warning - we don't use all of this.
+
+ private InputStream input = null;
+ private Reader reader = null;
+ private String base;
+ private String filename;
+ private StreamRDF sink;
+ private ParserProfile profile; // Warning - we don't use all of this.
@Override
public Lang getLang() {
return RDFLanguages.CSV;
}
-
- @Override
- public ParserProfile getProfile()
- {
- return profile ;
- }
-
- @Override
- public void setProfile(ParserProfile profile)
- { this.profile = profile ; }
-
- public LangCSV(Reader reader, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
- {
- this.reader = reader ;
- this.xmlBase = xmlBase ;
- this.filename = filename ;
- this.sink = sink ;
- this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
- }
-
- public LangCSV(InputStream in, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
- {
- this.input = in ;
- this.xmlBase = xmlBase ;
- this.filename = filename ;
- this.sink = sink ;
- this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
- }
+
+ @Override
+ public ParserProfile getProfile() {
+ return profile;
+ }
+
+ @Override
+ public void setProfile(ParserProfile profile) {
+ this.profile = profile;
+ }
+
+ public LangCSV(Reader reader, String base, String filename,
+ ErrorHandler errorHandler, StreamRDF sink) {
+ this.reader = reader;
+ this.base = base;
+ this.filename = filename;
+ this.sink = sink;
+ this.profile = RiotLib.profile(getLang(), base, errorHandler);
+ }
+
+ public LangCSV(InputStream in, String base, String filename,
+ ErrorHandler errorHandler, StreamRDF sink) {
+ this.input = in;
+ this.base = base;
+ this.filename = filename;
+ this.sink = sink;
+ this.profile = RiotLib.profile(getLang(), base, errorHandler);
+ }
@Override
public void parse() {
- sink.start() ;
- CSVParser parser = ( input != null ) ? CSVParser.create(input) : CSVParser.create(reader) ;
- List<String> row = null ;
- ArrayList<Node> predicates = new ArrayList<Node>();
- int rowNum = 0;
- while ( (row=parser.parse1())!=null) {
- rowNum++;
- if (rowNum==1){
- for (String column: row){
- Node predicate = this.profile.createURI(filename + "#" + column.trim(), rowNum, 0);
- predicates.add(predicate);
- }
- }else {
- Node subject = this.profile.createBlankNode(null, -1, -1);
- Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
- Node objectRow = this.profile.createTypedLiteral( (rowNum+"").trim(), XSDDatatype.XSDinteger, rowNum, 0);
- sink.triple(this.profile.createTriple(subject, predicateRow, objectRow, rowNum, 0) );
- for (int col=0;col<row.size();col++){
- Node predicate = predicates.get(col);
- String columnValue = row.get(col).trim();
- Node o ;
- try {
- // Try for a double.
- double d = Double.parseDouble(columnValue);
- o = NodeFactory.createLiteral(columnValue, XSDDatatype.XSDdouble) ;
- } catch(Exception e) {
- o = NodeFactory.createLiteral(columnValue) ;
- }
- sink.triple(this.profile.createTriple(subject, predicate, o, rowNum, col) );
- }
-
- }
- }
- sink.finish() ;
+ sink.start();
+ CSVParser parser = (input != null) ? CSVParser.create(input)
+ : CSVParser.create(reader);
+ List<String> row = null;
+ ArrayList<Node> predicates = new ArrayList<Node>();
+ int rowNum = 0;
+ while ((row = parser.parse1()) != null) {
+
+ if (rowNum == 0) {
+ for (String column : row) {
+ String uri = IRIResolver.resolveString(filename) + "#"
+ + toSafeLocalname(column);
+ Node predicate = this.profile.createURI(uri, rowNum, 0);
+ predicates.add(predicate);
+ }
+ } else {
+ //Node subject = this.profile.createBlankNode(null, -1, -1);
+ Node subject = caculateSubject(rowNum, filename);
+ Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
+ Node objectRow = this.profile
+ .createTypedLiteral((rowNum + ""),
+ XSDDatatype.XSDinteger, rowNum, 0);
+ sink.triple(this.profile.createTriple(subject, predicateRow,
+ objectRow, rowNum, 0));
+ for (int col = 0; col < row.size(); col++) {
+ Node predicate = predicates.get(col);
+ String columnValue = row.get(col).trim();
+ Node o;
+ try {
+ // Try for a double.
+ double d = Double.parseDouble(columnValue);
+ o = NodeFactory.createLiteral(columnValue,
+ XSDDatatype.XSDdouble);
+ } catch (Exception e) {
+ o = NodeFactory.createLiteral(columnValue);
+ }
+ sink.triple(this.profile.createTriple(subject, predicate,
+ o, rowNum, col));
+ }
+
+ }
+ rowNum++;
+ }
+ sink.finish();
+
+ }
+
+ public static String toSafeLocalname(String raw) {
+ String ret = raw.trim();
+ return encodeURIComponent(ret);
}
+
+ public static String encodeURIComponent(String s) {
+ String result;
+
+ try {
+ result = URLEncoder.encode(s, "UTF-8")
+ .replaceAll("\\+", "%20")
+ .replaceAll("\\%21", "!")
+ .replaceAll("\\%27", "'")
+ .replaceAll("\\%28", "(")
+ .replaceAll("\\%29", ")")
+ .replaceAll("\\%7E", "~");
+ } catch (UnsupportedEncodingException e) {
+ result = s;
+ }
+
+ return result;
+ }
+
+ public static Node caculateSubject(int rowNum, String filename){
+ Node subject = NodeFactory.createAnon();
+// String uri = IRIResolver.resolveString(filename) + "#Row_" + rowNum;
+// Node subject = NodeFactory.createURI(uri);
+ return subject;
+ }
}
Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java Sun Jun 15 15:48:18 2014
@@ -18,25 +18,75 @@
package org.apache.jena.riot.lang;
+import java.io.StringReader;
+
import org.apache.jena.atlas.junit.BaseTest;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.IRIResolver;
import org.junit.Test;
import com.hp.hpl.jena.rdf.model.Model;
import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.util.PrintUtil;
+
+public class TestLangCSV extends BaseTest {
+ private static final String FILE_NAME = "src/test/resources/test.csv";
+ private static final String FILE_URI = IRIResolver.resolveString(FILE_NAME);
+
+ @Test
+ public void testPredicateWithSpace() {
+ String[] s1 = { "Predicate With Space", "PredicateWithSpace" };
+ String[] s2 = {
+ //"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#Predicate+With+Space> 'PredicateWithSpace' ; ",
+ " [] <" + FILE_URI + "#Predicate%20With%20Space> 'PredicateWithSpace' ; ",
+ " <http://w3c/future-csv-vocab/row> 1 ." };
+ assertIsomorphicWith(s1, s2);
+ }
+
+ @Test
+ public void testNonURICharacters() {
+ String[] s1 = { "`~!@#$%^&*()-_=+[{]}|\\;:'\"<.>/?", "NonURICharacters" };
+ String[] s2 = {
+ //"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#%60%7E%21%40%23%24%25%5E%26*%28%29-_%3D%2B%5B%7B%5D%7D%7C%5C%3B%3A%27%22%3C.%3E%2F%3F> 'NonURICharacters' ; ",
+ " [] <" + FILE_URI + "#%60~!%40%23%24%25%5E%26*()-_%3D%2B%5B%7B%5D%7D%7C%5C%3B%3A'%22%3C.%3E%2F%3F> 'NonURICharacters' ; ",
+ " <http://w3c/future-csv-vocab/row> 1 ." };
+ assertIsomorphicWith(s1, s2);
+ }
+
+ @Test
+ public void testDigitalLocalName() {
+ String[] s1 = { "1234", "DigitalLocalName" };
+ String[] s2 = {
+ //"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#1234> 'DigitalLocalName' ; ",
+ " [] <" + FILE_URI + "#1234> 'DigitalLocalName' ; ",
+ " <http://w3c/future-csv-vocab/row> 1 ." };
+ assertIsomorphicWith(s1, s2);
+ }
+
+ @Test
+ public void RDFDataMgrReadTest() {
+ Model m1 = RDFDataMgr.loadModel(FILE_NAME, RDFLanguages.CSV);
+ Model m2 = ModelFactory.createDefaultModel();
+ m2.read(FILE_NAME, "CSV");
+ assertEquals(12, m1.size());
+ assertTrue(m1.isIsomorphicWith(m2));
+ }
+
+ private Model parseToModel(String[] strings, Lang lang) {
+ String string = StrUtils.strjoin("\n", strings);
+ StringReader r = new StringReader(string);
+ Model model = ModelFactory.createDefaultModel();
+ RDFDataMgr.read(model, r, FILE_NAME, lang);
+ return model;
+ }
+
+ private void assertIsomorphicWith(String[] s1, String[] s2){
+ Model m1 = parseToModel(s1, RDFLanguages.CSV);
+ Model m2 = parseToModel(s2, RDFLanguages.TURTLE);
+ assertTrue(m1.isIsomorphicWith(m2));
+ }
-public class TestLangCSV extends BaseTest
-{
- @Test public void RDFDataMgrReadTest() {
- String file = "src/test/resources/test.csv";
- Model m = RDFDataMgr.loadModel(file, RDFLanguages.CSV) ;
- assertEquals(6, m.size()) ;
- }
-
- @Test public void ModelReadTest(){
- Model m = ModelFactory.createDefaultModel() ;
- m.read("test.csv", "CSV") ;
- assertEquals(6, m.size()) ;
- }
}
Modified: jena/trunk/jena-arq/src/test/resources/test.csv
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/resources/test.csv?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/resources/test.csv (original)
+++ jena/trunk/jena-arq/src/test/resources/test.csv Sun Jun 15 15:48:18 2014
@@ -1,3 +1,3 @@
-Town,Population
-Southton,123000.0
-Northville,654000
+Town,Population,Predicate With Space,`~!@#$%^&*()-_=+[{]}|\;:'"<.>/?,1234
+Southton,123000.0,PredicateWithSpace1,NonURICharacters1,DigitalLocalName1
+Northville,654000,PredicateWithSpace2,NonURICharacters2,DigitalLocalName2