You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by jp...@apache.org on 2014/06/15 17:48:19 UTC

svn commit: r1602712 - in /jena/trunk/jena-arq/src: main/java/org/apache/jena/riot/lang/LangCSV.java test/java/org/apache/jena/riot/lang/TestLangCSV.java test/resources/test.csv

Author: jpz6311whu
Date: Sun Jun 15 15:48:18 2014
New Revision: 1602712

URL: http://svn.apache.org/r1602712
Log:
URI encoding for CSV column (JENA-625), improve corresponding testing

Modified:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
    jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java
    jena/trunk/jena-arq/src/test/resources/test.csv

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Sun Jun 15 15:48:18 2014
@@ -18,105 +18,149 @@
 
 package org.apache.jena.riot.lang;
 
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.util.ArrayList ;
-import java.util.List ;
-
-import org.apache.jena.atlas.csv.CSVParser ;
-import org.apache.jena.riot.Lang ;
-import org.apache.jena.riot.RDFLanguages ;
-import org.apache.jena.riot.system.ErrorHandler ;
-import org.apache.jena.riot.system.ParserProfile ;
-import org.apache.jena.riot.system.RiotLib ;
-import org.apache.jena.riot.system.StreamRDF ;
-
-import com.hp.hpl.jena.datatypes.xsd.XSDDatatype ;
-import com.hp.hpl.jena.graph.Node ;
-import com.hp.hpl.jena.graph.NodeFactory ;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.net.URLEncoder;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.IRIResolver;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+import com.hp.hpl.jena.graph.NodeFactory;
 
 public class LangCSV implements LangRIOT {
-	
+
 	public static final String CSV_PREFIX = "http://w3c/future-csv-vocab/";
 	public static final String CSV_ROW = CSV_PREFIX + "row";
-	
-    private InputStream input = null ;
-    private Reader reader = null ;
-    private String xmlBase ;
-    private String filename ;
-    private StreamRDF sink ;
-    private ParserProfile profile ;             // Warning - we don't use all of this.
+
+	private InputStream input = null;
+	private Reader reader = null;
+	private String base;
+	private String filename;
+	private StreamRDF sink;
+	private ParserProfile profile; // Warning - we don't use all of this.
 
 	@Override
 	public Lang getLang() {
 		return RDFLanguages.CSV;
 
 	}
-	
-    @Override
-    public ParserProfile getProfile()
-    {
-        return profile ;
-    }
-
-    @Override
-    public void setProfile(ParserProfile profile)
-    { this.profile = profile ; }
-        
-    public LangCSV(Reader reader, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
-    {
-        this.reader = reader ;
-        this.xmlBase = xmlBase ;
-        this.filename = filename ;
-        this.sink = sink ;
-        this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
-    }
-    
-    public LangCSV(InputStream in, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
-    {
-        this.input = in ;
-        this.xmlBase = xmlBase ;
-        this.filename = filename ;
-        this.sink = sink ;
-        this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
-    }
+
+	@Override
+	public ParserProfile getProfile() {
+		return profile;
+	}
+
+	@Override
+	public void setProfile(ParserProfile profile) {
+		this.profile = profile;
+	}
+
+	public LangCSV(Reader reader, String base, String filename,
+			ErrorHandler errorHandler, StreamRDF sink) {
+		this.reader = reader;
+		this.base = base;
+		this.filename = filename;
+		this.sink = sink;
+		this.profile = RiotLib.profile(getLang(), base, errorHandler);
+	}
+
+	public LangCSV(InputStream in, String base, String filename,
+			ErrorHandler errorHandler, StreamRDF sink) {
+		this.input = in;
+		this.base = base;
+		this.filename = filename;
+		this.sink = sink;
+		this.profile = RiotLib.profile(getLang(), base, errorHandler);
+	}
 
 	@Override
 	public void parse() {
-		 sink.start() ;
-		 CSVParser parser = ( input != null ) ? CSVParser.create(input) : CSVParser.create(reader) ;
-		 List<String> row = null ;
-		 ArrayList<Node> predicates = new ArrayList<Node>();
-		 int rowNum = 0;
-		 while ( (row=parser.parse1())!=null) {
-			 rowNum++;
-			 if (rowNum==1){
-				 for (String column: row){
-					 Node predicate = this.profile.createURI(filename + "#" + column.trim(), rowNum, 0);
-					 predicates.add(predicate);
-				 }
-			 }else {
-				 Node subject = this.profile.createBlankNode(null, -1, -1);
-				 Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
-				 Node objectRow = this.profile.createTypedLiteral( (rowNum+"").trim(), XSDDatatype.XSDinteger, rowNum, 0);
-				 sink.triple(this.profile.createTriple(subject, predicateRow, objectRow, rowNum, 0)   );
-				 for (int col=0;col<row.size();col++){
-					 Node predicate = predicates.get(col);
-				     String columnValue = row.get(col).trim();
-				     Node o ;
-					 try { 
-					     // Try for a double.
-					     double d = Double.parseDouble(columnValue);
-					     o = NodeFactory.createLiteral(columnValue, XSDDatatype.XSDdouble) ;
-					 } catch(Exception e) {
-						 o = NodeFactory.createLiteral(columnValue) ;
-					 }
-					 sink.triple(this.profile.createTriple(subject, predicate, o, rowNum, col)   );
-				 }
-				 
-			 }
-         }
-		 sink.finish() ;
+		sink.start();
+		CSVParser parser = (input != null) ? CSVParser.create(input)
+				: CSVParser.create(reader);
+		List<String> row = null;
+		ArrayList<Node> predicates = new ArrayList<Node>();
+		int rowNum = 0;
+		while ((row = parser.parse1()) != null) {
+			
+			if (rowNum == 0) {
+				for (String column : row) {
+					String uri = IRIResolver.resolveString(filename) + "#"
+							+ toSafeLocalname(column);
+					Node predicate = this.profile.createURI(uri, rowNum, 0);
+					predicates.add(predicate);
+				}
+			} else {
+				//Node subject = this.profile.createBlankNode(null, -1, -1);
+				Node subject = caculateSubject(rowNum, filename);
+				Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
+				Node objectRow = this.profile
+						.createTypedLiteral((rowNum + ""),
+								XSDDatatype.XSDinteger, rowNum, 0);
+				sink.triple(this.profile.createTriple(subject, predicateRow,
+						objectRow, rowNum, 0));
+				for (int col = 0; col < row.size(); col++) {
+					Node predicate = predicates.get(col);
+					String columnValue = row.get(col).trim();
+					Node o;
+					try {
+						// Try for a double.
+						double d = Double.parseDouble(columnValue);
+						o = NodeFactory.createLiteral(columnValue,
+								XSDDatatype.XSDdouble);
+					} catch (Exception e) {
+						o = NodeFactory.createLiteral(columnValue);
+					}
+					sink.triple(this.profile.createTriple(subject, predicate,
+							o, rowNum, col));
+				}
+
+			}
+			rowNum++;
+		}
+		sink.finish();
+
+	}
+
+	public static String toSafeLocalname(String raw) {
+		String ret = raw.trim();
+		return encodeURIComponent(ret);
 		
 	}
+	
+	public static String encodeURIComponent(String s) {
+	    String result;
+
+	    try {
+	        result = URLEncoder.encode(s, "UTF-8")
+	                .replaceAll("\\+", "%20")
+	                .replaceAll("\\%21", "!")
+	                .replaceAll("\\%27", "'")
+	                .replaceAll("\\%28", "(")
+	                .replaceAll("\\%29", ")")
+	                .replaceAll("\\%7E", "~");
+	    } catch (UnsupportedEncodingException e) {
+	        result = s;
+	    }
+
+	    return result;
+	}
+	
+	public static Node caculateSubject(int rowNum, String filename){
+		Node subject = NodeFactory.createAnon();
+//		String uri = IRIResolver.resolveString(filename) + "#Row_" + rowNum; 
+//		Node subject =  NodeFactory.createURI(uri);
+		return subject;
+	}
 }

Modified: jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangCSV.java Sun Jun 15 15:48:18 2014
@@ -18,25 +18,75 @@
 
 package org.apache.jena.riot.lang;
 
+import java.io.StringReader;
+
 import org.apache.jena.atlas.junit.BaseTest;
+import org.apache.jena.atlas.lib.StrUtils;
+import org.apache.jena.riot.Lang;
 import org.apache.jena.riot.RDFDataMgr;
 import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.IRIResolver;
 import org.junit.Test;
 
 import com.hp.hpl.jena.rdf.model.Model;
 import com.hp.hpl.jena.rdf.model.ModelFactory;
+import com.hp.hpl.jena.util.PrintUtil;
+
+public class TestLangCSV extends BaseTest {
+	private static final String FILE_NAME = "src/test/resources/test.csv";
+	private static final String FILE_URI = IRIResolver.resolveString(FILE_NAME);
+
+	@Test
+	public void testPredicateWithSpace() {
+		String[] s1 = { "Predicate With Space", "PredicateWithSpace" };
+		String[] s2 = {
+				//"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#Predicate+With+Space> 'PredicateWithSpace' ; ",
+				" [] <" + FILE_URI + "#Predicate%20With%20Space> 'PredicateWithSpace' ; ",
+				" <http://w3c/future-csv-vocab/row> 1 ." };
+		assertIsomorphicWith(s1, s2);
+	}
+	
+	@Test
+	public void testNonURICharacters() {
+		String[] s1 = { "`~!@#$%^&*()-_=+[{]}|\\;:'\"<.>/?", "NonURICharacters" };
+		String[] s2 = {
+				//"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#%60%7E%21%40%23%24%25%5E%26*%28%29-_%3D%2B%5B%7B%5D%7D%7C%5C%3B%3A%27%22%3C.%3E%2F%3F> 'NonURICharacters' ; ",
+				" [] <" + FILE_URI + "#%60~!%40%23%24%25%5E%26*()-_%3D%2B%5B%7B%5D%7D%7C%5C%3B%3A'%22%3C.%3E%2F%3F> 'NonURICharacters' ; ",
+				" <http://w3c/future-csv-vocab/row> 1 ." };
+		assertIsomorphicWith(s1, s2);
+	}
+	
+	@Test
+	public void testDigitalLocalName() {
+		String[] s1 = { "1234", "DigitalLocalName" };
+		String[] s2 = {
+				//"<"+ LangCSV.caculateSubject(1, FILE_NAME) + "> <" + FILE_URI + "#1234> 'DigitalLocalName' ; ",
+				" [] <" + FILE_URI + "#1234> 'DigitalLocalName' ; ",
+				" <http://w3c/future-csv-vocab/row> 1 ." };
+		assertIsomorphicWith(s1, s2);
+	}
+
+	@Test
+	public void RDFDataMgrReadTest() {
+		Model m1 = RDFDataMgr.loadModel(FILE_NAME, RDFLanguages.CSV);
+		Model m2 = ModelFactory.createDefaultModel();
+		m2.read(FILE_NAME, "CSV");
+		assertEquals(12, m1.size());
+		assertTrue(m1.isIsomorphicWith(m2));
+	}
+
+	private Model parseToModel(String[] strings, Lang lang) {
+		String string = StrUtils.strjoin("\n", strings);
+		StringReader r = new StringReader(string);
+		Model model = ModelFactory.createDefaultModel();
+		RDFDataMgr.read(model, r, FILE_NAME, lang);
+		return model;
+	}
+	
+	private void assertIsomorphicWith(String[] s1, String[] s2){
+		Model m1 = parseToModel(s1, RDFLanguages.CSV);
+		Model m2 = parseToModel(s2, RDFLanguages.TURTLE);
+		assertTrue(m1.isIsomorphicWith(m2));
+	}
 
-public class TestLangCSV extends BaseTest
-{
-	  @Test public void RDFDataMgrReadTest() {
-		  String file = "src/test/resources/test.csv";
-		  Model m = RDFDataMgr.loadModel(file, RDFLanguages.CSV) ;
-		  assertEquals(6, m.size()) ;
-	  }
-	  
-	  @Test public void ModelReadTest(){
-	      Model m = ModelFactory.createDefaultModel() ;
-	      m.read("test.csv", "CSV") ;
-	      assertEquals(6, m.size()) ;
-      }
 }

Modified: jena/trunk/jena-arq/src/test/resources/test.csv
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/resources/test.csv?rev=1602712&r1=1602711&r2=1602712&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/resources/test.csv (original)
+++ jena/trunk/jena-arq/src/test/resources/test.csv Sun Jun 15 15:48:18 2014
@@ -1,3 +1,3 @@
-Town,Population
-Southton,123000.0 
-Northville,654000 
+Town,Population,Predicate With Space,`~!@#$%^&*()-_=+[{]}|\;:'"<.>/?,1234
+Southton,123000.0,PredicateWithSpace1,NonURICharacters1,DigitalLocalName1
+Northville,654000,PredicateWithSpace2,NonURICharacters2,DigitalLocalName2