You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by jp...@apache.org on 2014/05/24 15:15:11 UTC
svn commit: r1597282 - in
/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot: RDFLanguages.java
RDFParserRegistry.java RiotReader.java lang/LangCSV.java
Author: jpz6311whu
Date: Sat May 24 13:15:11 2014
New Revision: 1597282
URL: http://svn.apache.org/r1597282
Log:
add LangCSV for reading and parsing CSV data
Added:
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (with props)
Modified:
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java
jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java Sat May 24 13:15:11 2014
@@ -47,6 +47,7 @@ public class RDFLanguages
public static final String strLangJSONLD = "JSON-LD" ;
public static final String strLangNQuads = "N-Quads" ;
public static final String strLangTriG = "TriG" ;
+ public static final String strLangCSV = "CSV";
/*
* ".owl" is not a formally registered file extension for OWL
@@ -116,6 +117,12 @@ public class RDFLanguages
/** Alternative constant {@linkplain #NQUADS} */
public static final Lang NQ = NQUADS ;
+
+ /** CSV */
+ public static final Lang CSV = LangBuilder.create(strLangCSV, contentTypeTextCSV)
+ .addAltNames("csv")
+ .addFileExtensions("csv")
+ .build() ;
/** The "null" language */
public static final Lang RDFNULL = LangBuilder.create("rdf/null", "null/rdf")
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RDFParserRegistry.java Sat May 24 13:15:11 2014
@@ -84,6 +84,7 @@ public class RDFParserRegistry
registerLangTriples(TURTLE, parserFactory) ;
registerLangTriples(JSONLD, parserFactoryJsonLD) ;
registerLangTriples(RDFJSON, parserFactory) ;
+ registerLangTriples(CSV, parserFactory) ;
registerLangQuads(JSONLD, parserFactoryJsonLD) ;
registerLangQuads(NQUADS, parserFactory) ;
Modified: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java?rev=1597282&r1=1597281&r2=1597282&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/RiotReader.java Sat May 24 13:15:11 2014
@@ -18,6 +18,7 @@
package org.apache.jena.riot;
+import static org.apache.jena.riot.RDFLanguages.CSV;
import static org.apache.jena.riot.RDFLanguages.N3 ;
import static org.apache.jena.riot.RDFLanguages.NQUADS ;
import static org.apache.jena.riot.RDFLanguages.NTRIPLES ;
@@ -131,6 +132,8 @@ public class RiotReader
if ( baseIRI != null )
baseIRI = IRIResolver.resolveString(baseIRI) ;
return LangRDFXML.create(input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest) ;
+ } else if ( lang == CSV){
+ return new LangCSV (input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest);
}
Tokenizer tokenizer = ( lang == RDFJSON ) ?
new TokenizerJSON(PeekReader.makeUTF8(input)) :
@@ -149,6 +152,8 @@ public class RiotReader
if ( baseIRI != null )
baseIRI = IRIResolver.resolveString(baseIRI) ;
return LangRDFXML.create(input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest) ;
+ } else if ( lang == CSV){
+ return new LangCSV (input, baseIRI, baseIRI, ErrorHandlerFactory.getDefaultErrorHandler(), dest);
}
Tokenizer tokenizer = ( lang == RDFJSON ) ?
new TokenizerJSON(PeekReader.make(input)) :
Added: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java?rev=1597282&view=auto
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java (added)
+++ jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java Sat May 24 13:15:11 2014
@@ -0,0 +1,144 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.lang;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.jena.atlas.csv.CSVParser;
+import org.apache.jena.atlas.csv.CSVTokenIterator;
+import org.apache.jena.atlas.logging.Log;
+import org.apache.jena.riot.Lang;
+import org.apache.jena.riot.RDFLanguages;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.ParserProfile;
+import org.apache.jena.riot.system.RiotLib;
+import org.apache.jena.riot.system.StreamRDF;
+import org.apache.jena.riot.tokens.Token;
+import org.apache.jena.riot.tokens.Tokenizer;
+import org.apache.jena.riot.tokens.TokenizerFactory;
+
+import com.hp.hpl.jena.datatypes.xsd.XSDDatatype;
+import com.hp.hpl.jena.graph.Node;
+
+public class LangCSV implements LangRIOT {
+
+ public static final String CSV_PREFIX = "http://w3c/future-csv-vocab/";
+ public static final String CSV_ROW = CSV_PREFIX + "row";
+
+ private InputStream input = null ;
+ private Reader reader = null ;
+ private String xmlBase ;
+ private String filename ;
+ private StreamRDF sink ;
+ private ParserProfile profile ; // Warning - we don't use all of this.
+
+ @Override
+ public Lang getLang() {
+ return RDFLanguages.CSV;
+
+ }
+
+ @Override
+ public ParserProfile getProfile()
+ {
+ return profile ;
+ }
+
+ @Override
+ public void setProfile(ParserProfile profile)
+ { this.profile = profile ; }
+
+ public LangCSV(Reader reader, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
+ {
+ this.reader = reader ;
+ this.xmlBase = xmlBase ;
+ this.filename = filename ;
+ this.sink = sink ;
+ this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
+ }
+
+ public LangCSV(InputStream in, String xmlBase, String filename, ErrorHandler errorHandler, StreamRDF sink)
+ {
+ this.input = in ;
+ this.xmlBase = xmlBase ;
+ this.filename = filename ;
+ this.sink = sink ;
+ this.profile = RiotLib.profile(getLang(), xmlBase, errorHandler) ;
+ }
+
+ @Override
+ public void parse() {
+ sink.start() ;
+ CSVTokenIterator iter;
+ if ( input != null ){
+ iter = new CSVTokenIterator(input) ;
+ } else {
+ iter = new CSVTokenIterator(reader) ;
+ }
+
+ CSVParser parser = new CSVParser(iter) ;
+ List<String> row = null ;
+ ArrayList<Node> predicates = new ArrayList<Node>();
+ int rowNum = 0;
+ while ( (row=parser.parse1())!=null) {
+ rowNum++;
+ if (rowNum==1){
+ for (String column: row){
+ Node predicate = this.profile.createURI(filename + "#" + column.trim(), -1, -1);
+ predicates.add(predicate);
+ }
+ }else {
+ Node subject = this.profile.createBlankNode(null, -1, -1);
+ Node predicateRow = this.profile.createURI(CSV_ROW, -1, -1);
+ Node objectRow = this.profile.createTypedLiteral( (rowNum+"").trim(), XSDDatatype.XSDinteger, -1, -1);
+ sink.triple(this.profile.createTriple(subject, predicateRow, objectRow, -1, -1) );
+ for (int i=0;i<row.size();i++){
+ Node predicate = predicates.get(i);
+
+ String columnValue = row.get(i).trim();
+ try{
+ Double.parseDouble(columnValue);
+ }catch(Exception e){
+ columnValue = "\""+columnValue +"\"";
+ }
+ sink.triple(this.profile.createTriple(subject, predicate, parse(columnValue), -1, -1) );
+ }
+
+ }
+ }
+ sink.finish() ;
+
+ }
+
+ private Node parse(String string)
+ {
+ Tokenizer tokenizer = TokenizerFactory.makeTokenizerString(string) ;
+ if ( ! tokenizer.hasNext() )
+ return null ;
+ Token t = tokenizer.next();
+ Node n = profile.create(null, t) ;
+ if ( tokenizer.hasNext() )
+ Log.warn(RiotLib.class, "String has more than one token in it: "+string) ;
+ return n ;
+ }
+
+}
Propchange: jena/trunk/jena-arq/src/main/java/org/apache/jena/riot/lang/LangCSV.java
------------------------------------------------------------------------------
svn:mime-type = text/plain