You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by jp...@apache.org on 2014/05/24 15:15:11 UTC

svn commit: r1597282 - in /jena/trunk/jena-arq/src/main/java/org/apache/jena/riot: RDFLanguages.java RDFParserRegistry.java RiotReader.java lang/LangCSV.java

Author: jpz6311whu
Date: Sat May 24 13:15:11 2014
New Revision: 1597282

URL: http://svn.apache.org/r1597282
Log:
add LangCSV for reading and parsing CSV data

Added:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java   (with props)
Modified:
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java
    jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java Sat May 24 13:15:11 2014
@@ -47,6 +47,7 @@ public class RDFLanguages
     public static final String strLangJSONLD     = "JSON-LD" ;
     public static final String strLangNQuads     = "N-Quads" ;
     public static final String strLangTriG       = "TriG" ;
+    public static final String strLangCSV        = "CSV";
     
     /*
      * ".owl" is not a formally registered file extension for OWL 
@@ -116,6 +117,12 @@ public class RDFLanguages
     
     /** Alternative constant {@linkplain #NQUADS} */
     public static final Lang NQ     = NQUADS ;
+    
+    /** CSV */
+    public static final Lang CSV   = LangBuilder.create(strLangCSV, contentTypeTextCSV)
+                                                .addAltNames("csv")   
+                                                .addFileExtensions("csv")
+                                                .build() ;
 
     /** The "null" language */
     public static final Lang RDFNULL  = LangBuilder.create("rdf/null", "null/rdf")

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java Sat May 24 13:15:11 2014
@@ -84,6 +84,7 @@ public class RDFParserRegistry
         registerLangTriples(TURTLE,     parserFactory) ;
         registerLangTriples(JSONLD,     parserFactoryJsonLD) ;
         registerLangTriples(RDFJSON,    parserFactory) ;
+        registerLangTriples(CSV,        parserFactory) ;
         
         registerLangQuads(JSONLD,       parserFactoryJsonLD) ;
         registerLangQuads(NQUADS,       parserFactory) ;

Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java Sat May 24 13:15:11 2014
@@ -18,6 +18,7 @@
 
 package org.apache.jena.riot;
 
+import static org.apache.jena.riot.RDFLanguages.CSV;
 import static org.apache.jena.riot.RDFLanguages.N3 ;
 import static org.apache.jena.riot.RDFLanguages.NQUADS ;
 import static org.apache.jena.riot.RDFLanguages.NTRIPLES ;
@@ -131,6 +132,8 @@ public class RiotReader
             if ( baseIRI != null )
                 baseIRI = IRIResolver.resolveString(baseIRI) ;
             return LangRDFXML.create(input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest) ;
+        } else if ( lang == CSV){
+        	return new LangCSV (input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(),  dest);
         }
         Tokenizer tokenizer = ( lang == RDFJSON ) ?
             new TokenizerJSON(PeekReader.makeUTF8(input)) :
@@ -149,6 +152,8 @@ public class RiotReader
             if ( baseIRI != null )
                 baseIRI = IRIResolver.resolveString(baseIRI) ;
             return LangRDFXML.create(input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest) ;
+        } else if ( lang == CSV){
+        	return new LangCSV (input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(),  dest);
         }
         Tokenizer tokenizer = ( lang == RDFJSON ) ?
             new TokenizerJSON(PeekReader.make(input)) :

Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1597282&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Sat May 24 13:15:11 2014
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.atlas.csv.CSVTokenIterator;
+import org.apache.jena.atlas.logging.Log;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+import org.apache.jena.riot.tokens.Token;
+import org.apache.jena.riot.tokens.Tokenizer;
+import org.apache.jena.riot.tokens.TokenizerFactory;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+
+public class LangCSV implements LangRIOT {
+	
+	public static final String CSV_PREFIX = "http://w3c/future-csv-vocab/";
+	public static final String CSV_ROW = CSV_PREFIX + "row";
+	
+    private InputStream input = null ;
+    private Reader reader = null ;
+    private String xmlBase ;
+    private String filename ;
+    private StreamRDF sink ;
+    private ParserProfile profile ;             // Warning - we don't use all of this.
+
+	@Override
+	public Lang getLang() {
+		return RDFLanguages.CSV;
+
+	}
+	
+    @Override
+    public ParserProfile getProfile()
+    {
+        return profile ;
+    }
+
+    @Override
+    public void setProfile(ParserProfile profile)
+    { this.profile = profile ; }
+        
+    public LangCSV(Reader reader, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
+    {
+        this.reader = reader ;
+        this.xmlBase = xmlBase ;
+        this.filename = filename ;
+        this.sink = sink ;
+        this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
+    }
+    
+    public LangCSV(InputStream in, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
+    {
+        this.input = in ;
+        this.xmlBase = xmlBase ;
+        this.filename = filename ;
+        this.sink = sink ;
+        this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
+    }
+
+	@Override
+	public void parse() {
+		 sink.start() ;
+		 CSVTokenIterator iter;
+		 if ( input != null ){
+			 iter = new CSVTokenIterator(input) ;
+		 } else {
+			 iter = new CSVTokenIterator(reader) ;
+		 }
+		 
+		 CSVParser parser = new CSVParser(iter) ;
+		 List<String> row = null ;
+		 ArrayList<Node> predicates = new ArrayList<Node>();
+		 int rowNum = 0;
+		 while ( (row=parser.parse1())!=null) {
+			 rowNum++;
+			 if (rowNum==1){
+				 for (String column: row){
+					 Node predicate = this.profile.createURI(filename + "#" + column.trim(), -1, -1);
+					 predicates.add(predicate);
+				 }
+			 }else {
+				 Node subject = this.profile.createBlankNode(null, -1, -1);
+				 Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
+				 Node objectRow = this.profile.createTypedLiteral( (rowNum+"").trim(), XSDDatatype.XSDinteger, -1, -1);
+				 sink.triple(this.profile.createTriple(subject, predicateRow, objectRow, -1, -1)   );
+				 for (int i=0;i<row.size();i++){
+					 Node predicate = predicates.get(i);
+					 
+				     String columnValue = row.get(i).trim();
+					 try{
+						 Double.parseDouble(columnValue);
+					 }catch(Exception e){
+						 columnValue = "\""+columnValue +"\"";
+					 }
+					 sink.triple(this.profile.createTriple(subject, predicate, parse(columnValue), -1, -1)   );
+				 }
+				 
+			 }
+         }
+		 sink.finish() ;
+		
+	}
+	
+    private Node parse(String string)
+    {
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(string) ;
+        if ( ! tokenizer.hasNext() )
+            return null ;
+        Token t = tokenizer.next();
+        Node n = profile.create(null, t) ;
+        if ( tokenizer.hasNext() )
+            Log.warn(RiotLib.class, "String has more than one token in it: "+string) ;
+        return n ;
+    }
+
+}

Propchange: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain