You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@taverna.apache.org by st...@apache.org on 2018/05/10 12:33:27 UTC

[7/7] incubator-taverna-language git commit: TAVERNA-1044: Allow partial parsing of metadata.rdf

TAVERNA-1044: Allow partial parsing of metadata.rdf

.. by setting errorHandler() for Jena

.. also move all W3CDTF code to RDFUtil


Project: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/commit/e1e95d19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/tree/e1e95d19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/diff/e1e95d19

Branch: refs/heads/master
Commit: e1e95d197fc91ae5d10307557a0ca9f1ef1f9e68
Parents: 0c83946
Author: Stian Soiland-Reyes <st...@apache.org>
Authored: Thu May 10 13:24:48 2018 +0100
Committer: Stian Soiland-Reyes <st...@apache.org>
Committed: Thu May 10 13:24:48 2018 +0100

----------------------------------------------------------------------
 .../manifest/combine/CombineManifest.java       | 20 ++---
 .../apache/taverna/robundle/utils/RDFUtils.java | 84 +++++++++++++-------
 2 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
----------------------------------------------------------------------
diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
index b171835..0efdd81 100644
--- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
+++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
@@ -67,6 +67,7 @@ import org.apache.jena.rdf.model.Statement;
 import org.apache.jena.riot.Lang;
 import org.apache.jena.riot.RDFParser;
 import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.system.ErrorHandlerFactory;
 import org.apache.taverna.robundle.Bundle;
 import org.apache.taverna.robundle.manifest.Agent;
 import org.apache.taverna.robundle.manifest.PathAnnotation;
@@ -242,8 +243,12 @@ public class CombineManifest {
 					.base(fakeFileURI(metadata))
 					.lang(Lang.RDFXML)
 					.source(in)
+					// TAVERNA-1044 avoid bailing out on broken XML
+					.errorHandler(ErrorHandlerFactory.errorHandlerWarn)
 					.parse(model.getGraph());
 		}
+		//System.out.println("Parsed:");
+		//model.write(System.out, "turtle");
 		return model;
 	}
 
@@ -351,7 +356,6 @@ public class CombineManifest {
 		Model metadata;
 		try {
 			metadata = parseRDF(metadataRdf);
-			metadata.write(System.out, "turtle");
 		} catch (IOException e) {
 			logger.log(WARNING, "Can't read " + metadataRdf, e);
 			return;
@@ -364,7 +368,9 @@ public class CombineManifest {
 		for (URI subject : bundleSubjects()) {
 			Resource resource = metadata.getResource(fakeFileURI(subject));
 			if (!metadata.containsResource(resource)) {
-				System.out.println("Nothing known about " + resource);
+				// No metadata about that resource, probably OK, but
+				// could be an absolute/relative path issue
+				logger.info("No metadata.rdf triples found about " + resource);
 				continue;
 			}
 
@@ -391,16 +397,6 @@ public class CombineManifest {
 				createdSt = resource.getProperty(dcCreated);
 			if (createdSt != null) {
 				FileTime fileTime = literalAsFileTime(createdSt.getObject());
-				if (fileTime == null && createdSt.getResource().isResource()) {
-					// perhaps one of those strange mixups of XML and RDF...
-					Property dcW3CDTF = metadata
-							.getProperty("http://purl.org/dc/terms/W3CDTF");
-					Statement w3cSt = createdSt.getResource().getProperty(
-							dcW3CDTF);
-					if (w3cSt != null) {
-						fileTime = literalAsFileTime(w3cSt.getObject());
-					}
-				}
 				if (fileTime != null) {
 					pathMetadata.setCreatedOn(fileTime);
 					if (pathMetadata.getFile() != null)

http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
----------------------------------------------------------------------
diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
index 2b13922..f92cc71 100644
--- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
+++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
@@ -10,9 +10,9 @@ import static java.nio.file.attribute.FileTime.fromMillis;
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
- * 
+ *
  *   http://www.apache.org/licenses/LICENSE-2.0
- * 
+ *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -31,7 +31,9 @@ import java.util.logging.Logger;
 import org.apache.jena.datatypes.DatatypeFormatException;
 import org.apache.jena.datatypes.xsd.XSDDateTime;
 import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.Property;
 import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
 import org.apache.jena.rdf.model.Statement;
 import org.apache.jena.vocabulary.RDF;
 
@@ -43,45 +45,69 @@ public class RDFUtils {
 		if (rdfNode == null) {
 			return null;
 		}
-		final Literal literal;
+		Literal literal = null;
 		if (rdfNode.isLiteral()) {
+			/* Example:
+			   <dcterms:created
+			     rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-26T10:29:00Z</dcterms:created>
+			 */
 			literal = rdfNode.asLiteral();
-		} else { 
-			// TAVERNA-1044: not a literal, so assume a resource.
-			// Let's climb into rdf:value if it exists, in case we're in a
-			// <dct:W3CDTF> typed bnode.
-			Statement valueStmt = rdfNode.asResource().getProperty(RDF.value);
-			if (valueStmt == null) {
-				// Make our own exception so logger gets a stacktrace
-				Exception ex = new Exception("Can't find timestamp as literal");
-				logger.log(Level.WARNING, 
-				           "Expected literal or resource with rdf:value. not " + rdfNode, 
-				           ex);
-				return null;
-			}
-			if (valueStmt.getObject().isLiteral()) {
-				literal = valueStmt.getObject().asLiteral();
-			} else {	
-				Exception ex = new Exception("Invalid timestamp literal");
-				logger.log(Level.WARNING, 
-				           "Expected rdf:value statement with literal object, not" + valueStmt,
-				           ex);
-				return null;				
+		} else {
+			// TAVERNA-1044: not a literal, so assume a resource
+			// with the literal nested somehow
+			Resource resource = rdfNode.asResource();
+
+			// Potential type of bnode
+			Resource dctW3CDTF = rdfNode.getModel().getResource("http://purl.org/dc/terms/W3CDTF");
+			// TAVERNA-1044 - COMBIE sometimes mis-use DCT:W3CDTF as if it was a property
+			Property dctW3CDTFProp = rdfNode.getModel().getProperty("http://purl.org/dc/terms/W3CDTF");
+
+			if (resource.hasProperty(RDF.type, dctW3CDTF)) {
+				// Semantically correct pattern, pick up rdf:value directly.
+				/* Example:
+			     <dcterms:created>
+			      <dcterms:W3CDTF>
+			        <rdf:value>2018-05-10T02:38:51Z</rdf:value>
+			      </dcterms:W3CDTF>
+			    </dcterms:created>
+				 */
+				Statement valueStmt = rdfNode.asResource().getProperty(RDF.value);
+				if (valueStmt != null && valueStmt.getObject().isLiteral()) {
+					literal = valueStmt.getLiteral();
+				}
+			} else if (resource.hasProperty(dctW3CDTFProp)) {
+				// TAVERNA-1044: Weird, dct:W3CDTF is a type, not a property,
+				// but we'll pretend it is to be compatible with example in
+				// http://identifiers.org/combine.specifications/omex.version-1
+
+				/* Example:
+				<dcterms:created rdf:parseType="Resource">
+				  <dcterms:W3CDTF>2014-06-26T10:29:00Z</dcterms:W3CDTF>
+				</dcterms:created>
+				*/
+				Statement w3cDtfStmt = resource.getProperty(dctW3CDTFProp);
+				if (w3cDtfStmt != null && w3cDtfStmt.getObject().isLiteral()) {
+					literal = w3cDtfStmt.getLiteral();
+				}
 			}
 		}
+		if (literal == null) {
+			Exception ex = new Exception("Invalid timestamp literal");
+			logger.log(Level.WARNING,
+			           "Expected literal value or dcterms:W3CDTF instance, not: " + rdfNode,
+			           ex);
+			return null;
+		}
 
 		Object value = literal.getValue();
 		XSDDateTime dateTime;
 		if (value instanceof XSDDateTime) {
 			dateTime = (XSDDateTime) value;
 		} else {
-			logger.info("Literal not an XSDDateTime, but: " + value.getClass()
-					+ " " + value);
-
+			logger.info("Literal not an XSDDateTime, but: " + value.getClass() + " " + value);
 			// Try to parse it anyway
 			try {
-				dateTime = (XSDDateTime) XSDdateTime.parse(literal
-						.getLexicalForm());
+				dateTime = (XSDDateTime) XSDdateTime.parse(literal.getLexicalForm());
 			} catch (DatatypeFormatException e) {
 				logger.warning("Invalid datetime: " + literal);
 				return null;