You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@commonsrdf.apache.org by st...@apache.org on 2016/09/28 13:52:00 UTC

[08/36] incubator-commonsrdf git commit: An abstract implementation of RDFParserBuilder

An abstract implementation of RDFParserBuilder


Project: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/commit/8d548d50
Tree: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/tree/8d548d50
Diff: http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/diff/8d548d50

Branch: refs/heads/master
Commit: 8d548d5005eef1ebf373bf666f31af039ad58c04
Parents: 6f1f669
Author: Stian Soiland-Reyes <st...@apache.org>
Authored: Fri Apr 1 17:05:34 2016 +0100
Committer: Stian Soiland-Reyes <st...@apache.org>
Committed: Fri Apr 1 17:05:34 2016 +0100

----------------------------------------------------------------------
 .../rdf/simple/AbstractRDFParserBuilder.java    | 393 +++++++++++++++++++
 1 file changed, 393 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-commonsrdf/blob/8d548d50/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java
----------------------------------------------------------------------
diff --git a/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java b/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java
new file mode 100644
index 0000000..925a365
--- /dev/null
+++ b/simple/src/main/java/org/apache/commons/rdf/simple/AbstractRDFParserBuilder.java
@@ -0,0 +1,393 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rdf.simple;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.text.ParseException;
+import java.util.Optional;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import org.apache.commons.rdf.api.Graph;
+import org.apache.commons.rdf.api.IRI;
+import org.apache.commons.rdf.api.RDFParserBuilder;
+import org.apache.commons.rdf.api.RDFSyntax;
+import org.apache.commons.rdf.api.RDFTermFactory;
+
+/**
+ * Abstract RDFParserBuilder
+ * <p>
+ * This abstract class keeps the builder properties in protected fields like
+ * {@link #sourceFile} using {@link Optional}. Some basic checking like
+ * {@link #checkIsAbsolute(IRI)} is performed.
+ * <p>
+ * This class and its subclasses are {@link Cloneable}, immutable and
+ * (therefore) thread-safe - each call to option methods like
+ * {@link #contentType(String)} or {@link #source(IRI)} will return a cloned,
+ * mutated copy.
+ * <p>
+ * By default, parsing is done by the abstract method
+ * {@link #parseSynchronusly()} - which is executed in a cloned snapshot - hence
+ * multiple {@link #parse()} calls are thread-safe. The default {@link #parse()}
+ * uses a thread pool in {@link #threadGroup} - but implementations can override
+ * {@link #parse()} (e.g. because it has its own threading model or use
+ * asynchronou  remote execution).
+ */
+public abstract class AbstractRDFParserBuilder implements RDFParserBuilder, Cloneable {
+
+	public static final ThreadGroup threadGroup = new ThreadGroup("Commons RDF parsers");
+	private static final ExecutorService threadpool = Executors.newCachedThreadPool(r -> new Thread(threadGroup, r));
+
+	// Basically only used for creating IRIs
+	private static RDFTermFactory internalRdfTermFactory = new SimpleRDFTermFactory();
+
+	/**
+	 * Get the set {@link RDFTermFactory}, if any.
+	 */
+	public Optional<RDFTermFactory> getRdfTermFactory() {
+		return rdfTermFactory;
+	}
+
+	/**
+	 * Get the set content-type {@link RDFSyntax}, if any.
+	 * <p>
+	 * If this is {@link Optional#isPresent()}, then 
+	 * {@link #getContentType()} contains the 
+	 * value of {@link RDFSyntax#mediaType}. 
+	 */
+	public Optional<RDFSyntax> getContentTypeSyntax() {
+		return contentTypeSyntax;
+	}
+	
+	/**
+	 * Get the set content-type String, if any.
+	 * <p>
+	 * If this is {@link Optional#isPresent()} and 
+	 * is recognized by {@link RDFSyntax#byMediaType(String)}, then
+	 * the corresponding {@link RDFSyntax} is set on 
+	 * {@link #getContentType()}, otherwise that is
+	 * {@link Optional#empty()}. 
+	 */
+	public final Optional<String> getContentType() {
+		return contentType;
+	}
+
+	/**
+	 * Get the set {@link Graph} to insert into, if any.
+	 * <p>
+	 * From the call to {@link #parseSynchronusly()}, this
+	 * method is always {@link Optional#isPresent()}
+	 * with a new {@link Graph} instance, and 
+	 * will be the value returned from {@link #parse()}.
+	 */	
+	public Optional<Graph> getIntoGraph() {
+		return intoGraph;
+	}
+
+	/**
+	 * Get the set base {@link IRI}, if present.
+	 * <p>
+	 * 
+	 */	
+	public Optional<IRI> getBase() {
+		return base;
+	}
+
+	/**
+	 * Get the set source {@link InputStream}.
+	 * <p>
+	 * If this is {@link Optional#isPresent()}, then 
+	 * {@link #getSourceFile()} and {@link #getSourceIri()}
+	 * are {@link Optional#empty()}.
+	 */
+	public Optional<InputStream> getSourceInputStream() {
+		return sourceInputStream;
+	}
+
+	/**
+	 * Get the set source {@link Path}.
+	 * <p>
+	 * If this is {@link Optional#isPresent()}, then 
+	 * {@link #getSourceInputStream()} and {@link #getSourceIri()}
+	 * are {@link Optional#empty()}.
+	 */	
+	public Optional<Path> getSourceFile() {
+		return sourceFile;
+	}
+
+	/**
+	 * Get the set source {@link Path}.
+	 * <p>
+	 * If this is {@link Optional#isPresent()}, then 
+	 * {@link #getSourceInputStream()} and {@link #getSourceInputStream()()}
+	 * are {@link Optional#empty()}.
+	 */		
+	public Optional<IRI> getSourceIri() {
+		return sourceIri;
+	}
+
+	private Optional<RDFTermFactory> rdfTermFactory = Optional.empty();
+	private Optional<RDFSyntax> contentTypeSyntax = Optional.empty();
+	private Optional<String> contentType = Optional.empty();
+	private Optional<Graph> intoGraph = Optional.empty();
+	private Optional<IRI> base = Optional.empty();
+	private Optional<InputStream> sourceInputStream = Optional.empty();
+	private Optional<Path> sourceFile = Optional.empty();
+	private Optional<IRI> sourceIri = Optional.empty();
+
+	@Override
+	public AbstractRDFParserBuilder clone() {
+		try {
+			return (AbstractRDFParserBuilder) super.clone();
+		} catch (CloneNotSupportedException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	@Override
+	public RDFParserBuilder rdfTermFactory(RDFTermFactory rdfTermFactory) {
+		AbstractRDFParserBuilder c = clone();
+		c.rdfTermFactory = Optional.ofNullable(rdfTermFactory);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder contentType(RDFSyntax rdfSyntax) throws IllegalArgumentException {
+		AbstractRDFParserBuilder c = clone();
+		c.contentTypeSyntax = Optional.ofNullable(rdfSyntax);
+		c.contentType = c.contentTypeSyntax.map(syntax -> syntax.mediaType);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder contentType(String contentType) {
+		AbstractRDFParserBuilder c = clone();
+		c.contentType = Optional.ofNullable(contentType);
+		c.contentTypeSyntax = c.contentType.flatMap(RDFSyntax::byMediaType);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder intoGraph(Graph graph) {
+		AbstractRDFParserBuilder c = clone();
+		c.intoGraph = Optional.ofNullable(graph);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder base(IRI base) {
+		AbstractRDFParserBuilder c = clone();
+		c.base = Optional.ofNullable(base);
+		c.base.ifPresent(i -> checkIsAbsolute(i));
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder base(String base) throws IllegalArgumentException {
+		return base(internalRdfTermFactory.createIRI(base));
+	}
+
+	@Override
+	public RDFParserBuilder source(InputStream inputStream) {
+		AbstractRDFParserBuilder c = clone();
+		c.resetSource();
+		c.sourceInputStream = Optional.ofNullable(inputStream);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder source(Path file) {
+		AbstractRDFParserBuilder c = clone();
+		c.resetSource();
+		c.sourceFile = Optional.ofNullable(file);
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder source(IRI iri) {
+		AbstractRDFParserBuilder c = clone();
+		c.resetSource();
+		c.sourceIri = Optional.ofNullable(iri);
+		c.sourceIri.ifPresent(i -> checkIsAbsolute(i));
+		return c;
+	}
+
+	@Override
+	public RDFParserBuilder source(String iri) throws IllegalArgumentException {
+		AbstractRDFParserBuilder c = clone();
+		c.resetSource();
+		c.sourceIri = Optional.ofNullable(iri).map(internalRdfTermFactory::createIRI);
+		c.sourceIri.ifPresent(i -> checkIsAbsolute(i));
+		return source(internalRdfTermFactory.createIRI(iri));
+	}
+
+	/**
+	 * Check if an iri is absolute.
+	 * <p>
+	 * Used by {@link #source(String)} and {@link #base(String)}
+	 * 
+	 * @param iri
+	 */
+	protected void checkIsAbsolute(IRI iri) {
+		if (!URI.create(iri.getIRIString()).isAbsolute()) {
+			throw new IllegalArgumentException("IRI is not absolute: " + iri);
+		}
+	}
+
+	/**
+	 * Check that one and only one source is present and valid.
+	 * <p>
+	 * Used by {@link #parse()}.
+	 * 
+	 * @param iri
+	 * @throws IOException
+	 */
+	protected void checkSource() throws IOException {
+		if (!sourceFile.isPresent() && !sourceInputStream.isPresent() && !sourceIri.isPresent()) {
+			throw new IllegalStateException("No source has been set");
+		}
+		if (sourceIri.isPresent() && sourceInputStream.isPresent()) {
+			throw new IllegalStateException("Both sourceIri and sourceInputStream have been set");
+		}
+		if (sourceIri.isPresent() && sourceFile.isPresent()) {
+			throw new IllegalStateException("Both sourceIri and sourceFile have been set");
+		}
+		if (sourceInputStream.isPresent() && sourceFile.isPresent()) {
+			throw new IllegalStateException("Both sourceInputStream and sourceFile have been set");
+		}
+		if (sourceFile.isPresent() && !sourceFile.filter(Files::isReadable).isPresent()) {
+			throw new IOException("Can't read file: " + sourceFile);
+		}
+	}
+
+	/**
+	 * Check if base is required.
+	 * 
+	 * @throws IllegalStateException if base is required, but not set.
+	 */
+	protected void checkBaseRequired() {
+		if (!base.isPresent() && sourceInputStream.isPresent()
+				&& !contentTypeSyntax.filter(t -> t == RDFSyntax.NQUADS || t == RDFSyntax.NTRIPLES).isPresent()) {
+			throw new IllegalStateException("base iri required for inputstream source");
+		}
+	}
+
+	/**
+	 * Reset all source* fields to Optional.empty()
+	 * <p>
+	 * Subclasses should override this and call <code>super.resetSource()</code>
+	 * if they need to reset any additional source* fields.
+	 * 
+	 */
+	protected void resetSource() {
+		sourceInputStream = Optional.empty();
+		sourceIri = Optional.empty();
+		sourceFile = Optional.empty();
+	}
+
+	/**
+	 * Parse {@link #sourceInputStream}, {@link #sourceFile} or
+	 * {@link #sourceIri}.
+	 * <p>
+	 * One of the source fields MUST be present, as checked by {@link #checkSource()}.
+	 * <p>
+	 * {@link #checkBaseRequired()} is called to verify if {@link #getBase()} is required.
+	 * <p>
+	 * When this method is called, {@link #intoGraph} MUST always be present, as
+	 * that is where the parsed triples MUST be inserted into.
+	 * <p>
+	 * 
+	 * @return
+	 * @throws IOException
+	 * @throws IllegalStateException
+	 * @throws ParseException
+	 */
+	protected abstract void parseSynchronusly() throws IOException, IllegalStateException, ParseException;
+
+	/**
+	 * Prepare a clone of this RDFParserBuilder which have been checked and
+	 * completed.
+	 * <p>
+	 * The returned clone will always have
+	 * {@link #getIntoGraph()} and {@link #getRdfTermFactory()} present.
+	 * <p>
+	 * If the {@link #getSourceFile()} is present, but the 
+	 * {@link #getBase()} is not present, the base will be set to the
+	 * <code>file:///</code> IRI for the Path's real path (e.g. resolving any 
+	 * symbolic links).  
+	 *  
+	 * @return
+	 * @throws IOException
+	 * @throws IllegalStateException
+	 */
+	protected AbstractRDFParserBuilder prepareForParsing() throws IOException, IllegalStateException {
+		checkSource();
+		checkBaseRequired();
+		// We'll make a clone of our current state which will be passed to
+		// parseSynchronously()
+		AbstractRDFParserBuilder c = clone();
+
+		// Use a fresh SimpleRDFTermFactory for each parse
+		if (!c.rdfTermFactory.isPresent()) {
+			c.rdfTermFactory = Optional.of(createRDFTermFactory());
+		}
+		// No graph? We'll create one.
+		if (!c.intoGraph.isPresent()) {
+			c.intoGraph = c.rdfTermFactory.map(RDFTermFactory::createGraph);
+		}
+		// sourceFile, but no base? Let's follow any symlinks and use
+		// the file:/// URI
+		if (c.sourceFile.isPresent() && !c.base.isPresent()) {
+			URI baseUri = c.sourceFile.get().toRealPath().toUri();
+			c.base = Optional.of(internalRdfTermFactory.createIRI(baseUri.toString()));
+		}
+		return c;
+	}
+
+	/**
+	 * Create a new {@link RDFTermFactory} for a parse session.
+	 * <p>
+	 * This is called by {@link #parse()} to set 
+	 * {@link #rdfTermFactory(RDFTermFactory)} if it is
+	 * {@link Optional#empty()}, and therefore used also for 
+	 * creating a new {@link Graph} if 
+	 * {@link #getIntoGraph()} is {@link Optional#empty()}.
+	 * <p>
+	 * 
+	 * 
+	 * @return
+	 */
+	protected RDFTermFactory createRDFTermFactory() {
+		return new SimpleRDFTermFactory();
+	}
+
+	@Override
+	public Future<Graph> parse() throws IOException, IllegalStateException {
+		final AbstractRDFParserBuilder c = prepareForParsing();
+		return threadpool.submit(() -> {
+			c.parseSynchronusly();
+			return c.intoGraph.get();
+		});
+	}
+
+}