You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commonsrdf.apache.org by st...@apache.org on 2016/10/07 15:01:16 UTC
[13/50] incubator-commonsrdf git commit: COMMONSRDF-39 RDF4JParser in
experimental package
COMMONSRDF-39 RDF4JParser in experimental package
Project: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/commit/d9051898
Tree: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/tree/d9051898
Diff: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/diff/d9051898
Branch: refs/heads/master
Commit: d90518985a6bc886dec31b9b2854367f1fe9ea3c
Parents: cb2a81f
Author: Stian Soiland-Reyes <st...@apache.org>
Authored: Wed Sep 28 14:30:13 2016 +0100
Committer: Stian Soiland-Reyes <st...@apache.org>
Committed: Wed Sep 28 14:30:13 2016 +0100
----------------------------------------------------------------------
.../commons/rdf/rdf4j/RDF4JParserBuilder.java | 194 ------------------
.../rdf/rdf4j/experimental/RDF4JParser.java | 197 +++++++++++++++++++
.../rdf/rdf4j/experimental/package-info.java | 34 ++++
.../apache/commons/rdf/rdf4j/package-info.java | 6 +-
4 files changed, 234 insertions(+), 197 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/d9051898/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
deleted file mode 100644
index 732112b..0000000
--- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/RDF4JParserBuilder.java
+++ /dev/null
@@ -1,194 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.commons.rdf.rdf4j;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.Optional;
-import java.util.function.Consumer;
-
-import org.apache.commons.rdf.api.IRI;
-import org.apache.commons.rdf.api.Quad;
-import org.apache.commons.rdf.api.RDFParserBuilder;
-import org.apache.commons.rdf.api.RDFSyntax;
-import org.apache.commons.rdf.simple.AbstractRDFParserBuilder;
-import org.eclipse.rdf4j.model.Model;
-import org.eclipse.rdf4j.repository.util.RDFInserter;
-import org.eclipse.rdf4j.repository.util.RDFLoader;
-import org.eclipse.rdf4j.rio.ParserConfig;
-import org.eclipse.rdf4j.rio.RDFFormat;
-import org.eclipse.rdf4j.rio.RDFHandler;
-import org.eclipse.rdf4j.rio.RDFHandlerException;
-import org.eclipse.rdf4j.rio.Rio;
-import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
-
-/**
- * RDF4J-based parser.
- * <p>
- * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
- * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
- * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE}
- * - additional syntaxes can be supported by including the corresponding
- * <em>rdf4j-rio-*</em> module on the classpath.
- *
- */
-public class RDF4JParserBuilder extends AbstractRDFParserBuilder<RDF4JParserBuilder> implements RDFParserBuilder {
-
- private final class AddToQuadConsumer extends AbstractRDFHandler {
- private final Consumer<Quad> quadTarget;
-
- private AddToQuadConsumer(Consumer<Quad> quadTarget) {
- this.quadTarget = quadTarget;
- }
-
- public void handleStatement(org.eclipse.rdf4j.model.Statement st)
- throws org.eclipse.rdf4j.rio.RDFHandlerException {
- // TODO: if getRdfTermFactory() is a non-rdf4j factory, should
- // we use factory.createQuad() instead?
- // Unsure what is the promise of setting getRdfTermFactory() --
- // does it go all the way down to creating BlankNode, IRI and
- // Literal?
- quadTarget.accept(rdf4jTermFactory.asQuad(st));
- // Performance note:
- // Graph/Quad.add should pick up again our
- // RDF4JGraphLike.asStatement()
- // and avoid double conversion.
- // Additionally the RDF4JQuad and RDF4JTriple implementations
- // are lazily converting subj/obj/pred/graph.s
- }
- }
-
- private final static class AddToModel extends AbstractRDFHandler {
- private final Model model;
-
- public AddToModel(Model model) {
- this.model = model;
- }
-
- public void handleStatement(org.eclipse.rdf4j.model.Statement st)
- throws org.eclipse.rdf4j.rio.RDFHandlerException {
- model.add(st);
- }
-
- @Override
- public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
- model.setNamespace(prefix, uri);
- }
- }
-
- private RDF4JTermFactory rdf4jTermFactory;
-
- @Override
- protected RDF4JTermFactory createRDFTermFactory() {
- return new RDF4JTermFactory();
- }
-
- @Override
- protected RDF4JParserBuilder prepareForParsing() throws IOException, IllegalStateException {
- RDF4JParserBuilder c = prepareForParsing();
- // Ensure we have an RDF4JTermFactory for conversion.
- // We'll make a new one if user has provided a non-RDF4J factory
- c.rdf4jTermFactory = (RDF4JTermFactory) getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance)
- .orElseGet(c::createRDFTermFactory);
- return c;
- }
-
- @Override
- protected void parseSynchronusly() throws IOException {
- Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType);
- String base = getBase().map(IRI::getIRIString).orElse(null);
-
- ParserConfig parserConfig = new ParserConfig();
- // TODO: Should we need to set anything?
- RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory());
- RDFHandler rdfHandler = makeRDFHandler();
- if (getSourceFile().isPresent()) {
- // NOTE: While we could have used
- // loader.load(sourcePath.toFile()
- // if the path fs provider == FileSystems.getDefault(),
- // that RDFLoader method does not use absolute path
- // as the base URI, so to be consistent
- // we'll always do it with our own input stream
- //
- // That means we may have to guess format by extensions:
- Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
- .flatMap(Rio::getParserFormatForFileName);
- // TODO: for the excited.. what about the extension after following symlinks?
-
- RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null));
- try (InputStream in = Files.newInputStream(getSourceFile().get())) {
- loader.load(in, base, format, rdfHandler);
- }
- } else if (getSourceIri().isPresent()) {
- try {
- // TODO: Handle international IRIs properly
- // (Unicode support for for hostname, path and query)
- URL url = new URL(getSourceIri().get().getIRIString());
- // TODO: This probably does not support https:// -> http:// redirections
- loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler());
- } catch (MalformedURLException ex) {
- throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex);
- }
- }
- // must be getSourceInputStream then, this is guaranteed by super.checkSource();
- loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler);
- }
-
- protected RDFHandler makeRDFHandler() {
-
- // TODO: Can we join the below DF4JDataset and RDF4JGraph cases
- // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>>
- // or will that need tricky generics types?
-
- if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) {
- // One of us, we can add them as Statements directly
- RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get();
- if (dataset.asRepository().isPresent()) {
- return new RDFInserter(dataset.asRepository().get().getConnection());
- }
- if (dataset.asModel().isPresent()) {
- Model model = dataset.asModel().get();
- return new AddToModel(model);
- }
- // Not backed by Repository or Model?
- // Third-party RDF4JDataset subclass, so we'll fall through to the
- // getTarget() handling further down
- } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) {
- RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get();
-
- if (graph.asRepository().isPresent()) {
- RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection());
- graph.getContextFilter().ifPresent(inserter::enforceContext);
- return inserter;
- }
- if (graph.asModel().isPresent() && graph.getContextFilter().isPresent()) {
- Model model = graph.asModel().get();
- return new AddToModel(model);
- }
- // else - fall through
- }
-
- // Fall thorough: let target() consume our converted quads.
- return new AddToQuadConsumer(getTarget());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/d9051898/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java
new file mode 100644
index 0000000..c185419
--- /dev/null
+++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/RDF4JParser.java
@@ -0,0 +1,197 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rdf.rdf4j.experimental;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Optional;
+import java.util.function.Consumer;
+
+import org.apache.commons.rdf.api.IRI;
+import org.apache.commons.rdf.api.Quad;
+import org.apache.commons.rdf.api.RDFSyntax;
+import org.apache.commons.rdf.experimental.RDFParser;
+import org.apache.commons.rdf.rdf4j.RDF4JDataset;
+import org.apache.commons.rdf.rdf4j.RDF4JGraph;
+import org.apache.commons.rdf.rdf4j.RDF4JTermFactory;
+import org.apache.commons.rdf.simple.experimental.AbstractRDFParser;
+import org.eclipse.rdf4j.model.Model;
+import org.eclipse.rdf4j.repository.util.RDFInserter;
+import org.eclipse.rdf4j.repository.util.RDFLoader;
+import org.eclipse.rdf4j.rio.ParserConfig;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFHandler;
+import org.eclipse.rdf4j.rio.RDFHandlerException;
+import org.eclipse.rdf4j.rio.Rio;
+import org.eclipse.rdf4j.rio.helpers.AbstractRDFHandler;
+
+/**
+ * RDF4J-based parser.
+ * <p>
+ * This can handle the RDF syntaxes {@link RDFSyntax#JSONLD},
+ * {@link RDFSyntax#NQUADS}, {@link RDFSyntax#NTRIPLES},
+ * {@link RDFSyntax#RDFXML}, {@link RDFSyntax#TRIG} and {@link RDFSyntax#TURTLE}
+ * - additional syntaxes can be supported by including the corresponding
+ * <em>rdf4j-rio-*</em> module on the classpath.
+ *
+ */
+public class RDF4JParser extends AbstractRDFParser<RDF4JParser> implements RDFParser {
+
+ private final class AddToQuadConsumer extends AbstractRDFHandler {
+ private final Consumer<Quad> quadTarget;
+
+ private AddToQuadConsumer(Consumer<Quad> quadTarget) {
+ this.quadTarget = quadTarget;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement st)
+ throws org.eclipse.rdf4j.rio.RDFHandlerException {
+ // TODO: if getRdfTermFactory() is a non-rdf4j factory, should
+ // we use factory.createQuad() instead?
+ // Unsure what is the promise of setting getRdfTermFactory() --
+ // does it go all the way down to creating BlankNode, IRI and
+ // Literal?
+ quadTarget.accept(rdf4jTermFactory.asQuad(st));
+ // Performance note:
+ // Graph/Quad.add should pick up again our
+ // RDF4JGraphLike.asStatement()
+ // and avoid double conversion.
+ // Additionally the RDF4JQuad and RDF4JTriple implementations
+ // are lazily converting subj/obj/pred/graph.s
+ }
+ }
+
+ private final static class AddToModel extends AbstractRDFHandler {
+ private final Model model;
+
+ public AddToModel(Model model) {
+ this.model = model;
+ }
+
+ public void handleStatement(org.eclipse.rdf4j.model.Statement st)
+ throws org.eclipse.rdf4j.rio.RDFHandlerException {
+ model.add(st);
+ }
+
+ @Override
+ public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
+ model.setNamespace(prefix, uri);
+ }
+ }
+
+ private RDF4JTermFactory rdf4jTermFactory;
+
+ @Override
+ protected RDF4JTermFactory createRDFTermFactory() {
+ return new RDF4JTermFactory();
+ }
+
+ @Override
+ protected RDF4JParser prepareForParsing() throws IOException, IllegalStateException {
+ RDF4JParser c = prepareForParsing();
+ // Ensure we have an RDF4JTermFactory for conversion.
+ // We'll make a new one if user has provided a non-RDF4J factory
+ c.rdf4jTermFactory = (RDF4JTermFactory) getRdfTermFactory().filter(RDF4JTermFactory.class::isInstance)
+ .orElseGet(c::createRDFTermFactory);
+ return c;
+ }
+
+ @Override
+ protected void parseSynchronusly() throws IOException {
+ Optional<RDFFormat> formatByMimeType = getContentType().flatMap(Rio::getParserFormatForMIMEType);
+ String base = getBase().map(IRI::getIRIString).orElse(null);
+
+ ParserConfig parserConfig = new ParserConfig();
+ // TODO: Should we need to set anything?
+ RDFLoader loader = new RDFLoader(parserConfig, rdf4jTermFactory.getValueFactory());
+ RDFHandler rdfHandler = makeRDFHandler();
+ if (getSourceFile().isPresent()) {
+ // NOTE: While we could have used
+ // loader.load(sourcePath.toFile()
+ // if the path fs provider == FileSystems.getDefault(),
+ // that RDFLoader method does not use absolute path
+ // as the base URI, so to be consistent
+ // we'll always do it with our own input stream
+ //
+ // That means we may have to guess format by extensions:
+ Optional<RDFFormat> formatByFilename = getSourceFile().map(Path::getFileName).map(Path::toString)
+ .flatMap(Rio::getParserFormatForFileName);
+ // TODO: for the excited.. what about the extension after following symlinks?
+
+ RDFFormat format = formatByMimeType.orElse(formatByFilename.orElse(null));
+ try (InputStream in = Files.newInputStream(getSourceFile().get())) {
+ loader.load(in, base, format, rdfHandler);
+ }
+ } else if (getSourceIri().isPresent()) {
+ try {
+ // TODO: Handle international IRIs properly
+ // (Unicode support for for hostname, path and query)
+ URL url = new URL(getSourceIri().get().getIRIString());
+ // TODO: This probably does not support https:// -> http:// redirections
+ loader.load(url, base, formatByMimeType.orElse(null), makeRDFHandler());
+ } catch (MalformedURLException ex) {
+ throw new IOException("Can't handle source URL: " + getSourceIri().get(), ex);
+ }
+ }
+ // must be getSourceInputStream then, this is guaranteed by super.checkSource();
+ loader.load(getSourceInputStream().get(), base, formatByMimeType.orElse(null), rdfHandler);
+ }
+
+ protected RDFHandler makeRDFHandler() {
+
+ // TODO: Can we join the below DF4JDataset and RDF4JGraph cases
+ // using RDF4JGraphLike<TripleLike<BlankNodeOrIRI,IRI,RDFTerm>>
+ // or will that need tricky generics types?
+
+ if (getTargetDataset().filter(RDF4JDataset.class::isInstance).isPresent()) {
+ // One of us, we can add them as Statements directly
+ RDF4JDataset dataset = (RDF4JDataset) getTargetDataset().get();
+ if (dataset.asRepository().isPresent()) {
+ return new RDFInserter(dataset.asRepository().get().getConnection());
+ }
+ if (dataset.asModel().isPresent()) {
+ Model model = dataset.asModel().get();
+ return new AddToModel(model);
+ }
+ // Not backed by Repository or Model?
+ // Third-party RDF4JDataset subclass, so we'll fall through to the
+ // getTarget() handling further down
+ } else if (getTargetGraph().filter(RDF4JGraph.class::isInstance).isPresent()) {
+ RDF4JGraph graph = (RDF4JGraph) getTargetGraph().get();
+
+ if (graph.asRepository().isPresent()) {
+ RDFInserter inserter = new RDFInserter(graph.asRepository().get().getConnection());
+ graph.getContextFilter().ifPresent(inserter::enforceContext);
+ return inserter;
+ }
+ if (graph.asModel().isPresent() && graph.getContextFilter().isPresent()) {
+ Model model = graph.asModel().get();
+ return new AddToModel(model);
+ }
+ // else - fall through
+ }
+
+ // Fall thorough: let target() consume our converted quads.
+ return new AddToQuadConsumer(getTarget());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/d9051898/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/package-info.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/package-info.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/package-info.java
new file mode 100644
index 0000000..192a148
--- /dev/null
+++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/experimental/package-info.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+ * Experimental Commons RDF RDF4J implementations.
+ * <p>
+ * Classes in this package should be considered <strong>at
+ * risk</strong>; they might change or be removed in the next minor update of
+ * Commons RDF.
+ * <p>
+ * When a class has stabilized, it will move to the
+ * {@link org.apache.commons.rdf.rdf4j} package.
+ * <p>
+ * <ul>
+ * <li>{@link RDF4JParser} - an RDF4J-backed
+ * implementations of
+ * {@link org.apache.commons.rdf.api.experimental.RDFParser}.</li>
+ * </ul>
+ */
+package org.apache.commons.rdf.rdf4j.experimental;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/d9051898/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/package-info.java
----------------------------------------------------------------------
diff --git a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/package-info.java b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/package-info.java
index 844da5a..e384f3d 100644
--- a/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/package-info.java
+++ b/rdf4j/src/main/java/org/apache/commons/rdf/rdf4j/package-info.java
@@ -41,11 +41,11 @@
* {@link org.apache.commons.rdf.rdf4j.RDF4JDataset} provide access to the
* underlying RDF4J implementations.
* <p>
- * The {@link org.apache.commons.rdf.rdf4j.RDF4JParserBuilder} can be used to
+ * The {@link org.apache.commons.rdf.rdf4j.experimental.RDF4JParser} can be used to
* parse RDF files using RDF4j. It should be most efficient if used with
- * {@link org.apache.commons.rdf.rdf4j.RDF4JParserBuilder#target(org.apache.commons.rdf.api.Dataset)}
+ * {@link org.apache.commons.rdf.rdf4j.experimental.RDF4JParser#target(org.apache.commons.rdf.api.Dataset)}
* and an adapted {@link org.apache.commons.rdf.rdf4j.RDF4JDataset}, or
- * {@link org.apache.commons.rdf.rdf4j.RDF4JParserBuilder#target(org.apache.commons.rdf.api.Graph)}
+ * {@link org.apache.commons.rdf.rdf4j.experimental.RDF4JParser#target(org.apache.commons.rdf.api.Graph)}
* and a an adapted {@link org.apache.commons.rdf.rdf4j.RDF4JGraph}
*
*