You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@taverna.apache.org by st...@apache.org on 2018/05/10 12:33:27 UTC
[7/7] incubator-taverna-language git commit: TAVERNA-1044: Allow
partial parsing of metadata.rdf
TAVERNA-1044: Allow partial parsing of metadata.rdf
.. by setting errorHandler() for Jena
.. also move all W3CDTF code to RDFUtil
Project: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/commit/e1e95d19
Tree: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/tree/e1e95d19
Diff: http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/diff/e1e95d19
Branch: refs/heads/master
Commit: e1e95d197fc91ae5d10307557a0ca9f1ef1f9e68
Parents: 0c83946
Author: Stian Soiland-Reyes <st...@apache.org>
Authored: Thu May 10 13:24:48 2018 +0100
Committer: Stian Soiland-Reyes <st...@apache.org>
Committed: Thu May 10 13:24:48 2018 +0100
----------------------------------------------------------------------
.../manifest/combine/CombineManifest.java | 20 ++---
.../apache/taverna/robundle/utils/RDFUtils.java | 84 +++++++++++++-------
2 files changed, 63 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
----------------------------------------------------------------------
diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
index b171835..0efdd81 100644
--- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
+++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/manifest/combine/CombineManifest.java
@@ -67,6 +67,7 @@ import org.apache.jena.rdf.model.Statement;
import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFParser;
import org.apache.jena.riot.RiotException;
+import org.apache.jena.riot.system.ErrorHandlerFactory;
import org.apache.taverna.robundle.Bundle;
import org.apache.taverna.robundle.manifest.Agent;
import org.apache.taverna.robundle.manifest.PathAnnotation;
@@ -242,8 +243,12 @@ public class CombineManifest {
.base(fakeFileURI(metadata))
.lang(Lang.RDFXML)
.source(in)
+ // TAVERNA-1044 avoid bailing out on broken XML
+ .errorHandler(ErrorHandlerFactory.errorHandlerWarn)
.parse(model.getGraph());
}
+ //System.out.println("Parsed:");
+ //model.write(System.out, "turtle");
return model;
}
@@ -351,7 +356,6 @@ public class CombineManifest {
Model metadata;
try {
metadata = parseRDF(metadataRdf);
- metadata.write(System.out, "turtle");
} catch (IOException e) {
logger.log(WARNING, "Can't read " + metadataRdf, e);
return;
@@ -364,7 +368,9 @@ public class CombineManifest {
for (URI subject : bundleSubjects()) {
Resource resource = metadata.getResource(fakeFileURI(subject));
if (!metadata.containsResource(resource)) {
- System.out.println("Nothing known about " + resource);
+ // No metadata about that resource, probably OK, but
+ // could be an absolute/relative path issue
+ logger.info("No metadata.rdf triples found about " + resource);
continue;
}
@@ -391,16 +397,6 @@ public class CombineManifest {
createdSt = resource.getProperty(dcCreated);
if (createdSt != null) {
FileTime fileTime = literalAsFileTime(createdSt.getObject());
- if (fileTime == null && createdSt.getResource().isResource()) {
- // perhaps one of those strange mixups of XML and RDF...
- Property dcW3CDTF = metadata
- .getProperty("http://purl.org/dc/terms/W3CDTF");
- Statement w3cSt = createdSt.getResource().getProperty(
- dcW3CDTF);
- if (w3cSt != null) {
- fileTime = literalAsFileTime(w3cSt.getObject());
- }
- }
if (fileTime != null) {
pathMetadata.setCreatedOn(fileTime);
if (pathMetadata.getFile() != null)
http://git-wip-us.apache.org/repos/asf/incubator-taverna-language/blob/e1e95d19/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
----------------------------------------------------------------------
diff --git a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
index 2b13922..f92cc71 100644
--- a/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
+++ b/taverna-robundle/src/main/java/org/apache/taverna/robundle/utils/RDFUtils.java
@@ -10,9 +10,9 @@ import static java.nio.file.attribute.FileTime.fromMillis;
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
- *
+ *
* http://www.apache.org/licenses/LICENSE-2.0
- *
+ *
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
@@ -31,7 +31,9 @@ import java.util.logging.Logger;
import org.apache.jena.datatypes.DatatypeFormatException;
import org.apache.jena.datatypes.xsd.XSDDateTime;
import org.apache.jena.rdf.model.Literal;
+import org.apache.jena.rdf.model.Property;
import org.apache.jena.rdf.model.RDFNode;
+import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.vocabulary.RDF;
@@ -43,45 +45,69 @@ public class RDFUtils {
if (rdfNode == null) {
return null;
}
- final Literal literal;
+ Literal literal = null;
if (rdfNode.isLiteral()) {
+ /* Example:
+ <dcterms:created
+ rdf:datatype="http://www.w3.org/2001/XMLSchema#dateTime">2014-06-26T10:29:00Z</dcterms:created>
+ */
literal = rdfNode.asLiteral();
- } else {
- // TAVERNA-1044: not a literal, so assume a resource.
- // Let's climb into rdf:value if it exists, in case we're in a
- // <dct:W3CDTF> typed bnode.
- Statement valueStmt = rdfNode.asResource().getProperty(RDF.value);
- if (valueStmt == null) {
- // Make our own exception so logger gets a stacktrace
- Exception ex = new Exception("Can't find timestamp as literal");
- logger.log(Level.WARNING,
- "Expected literal or resource with rdf:value. not " + rdfNode,
- ex);
- return null;
- }
- if (valueStmt.getObject().isLiteral()) {
- literal = valueStmt.getObject().asLiteral();
- } else {
- Exception ex = new Exception("Invalid timestamp literal");
- logger.log(Level.WARNING,
- "Expected rdf:value statement with literal object, not" + valueStmt,
- ex);
- return null;
+ } else {
+ // TAVERNA-1044: not a literal, so assume a resource
+ // with the literal nested somehow
+ Resource resource = rdfNode.asResource();
+
+ // Potential type of bnode
+ Resource dctW3CDTF = rdfNode.getModel().getResource("http://purl.org/dc/terms/W3CDTF");
+ // TAVERNA-1044 - COMBIE sometimes mis-use DCT:W3CDTF as if it was a property
+ Property dctW3CDTFProp = rdfNode.getModel().getProperty("http://purl.org/dc/terms/W3CDTF");
+
+ if (resource.hasProperty(RDF.type, dctW3CDTF)) {
+ // Semantically correct pattern, pick up rdf:value directly.
+ /* Example:
+ <dcterms:created>
+ <dcterms:W3CDTF>
+ <rdf:value>2018-05-10T02:38:51Z</rdf:value>
+ </dcterms:W3CDTF>
+ </dcterms:created>
+ */
+ Statement valueStmt = rdfNode.asResource().getProperty(RDF.value);
+ if (valueStmt != null && valueStmt.getObject().isLiteral()) {
+ literal = valueStmt.getLiteral();
+ }
+ } else if (resource.hasProperty(dctW3CDTFProp)) {
+ // TAVERNA-1044: Weird, dct:W3CDTF is a type, not a property,
+ // but we'll pretend it is to be compatible with example in
+ // http://identifiers.org/combine.specifications/omex.version-1
+
+ /* Example:
+ <dcterms:created rdf:parseType="Resource">
+ <dcterms:W3CDTF>2014-06-26T10:29:00Z</dcterms:W3CDTF>
+ </dcterms:created>
+ */
+ Statement w3cDtfStmt = resource.getProperty(dctW3CDTFProp);
+ if (w3cDtfStmt != null && w3cDtfStmt.getObject().isLiteral()) {
+ literal = w3cDtfStmt.getLiteral();
+ }
}
}
+ if (literal == null) {
+ Exception ex = new Exception("Invalid timestamp literal");
+ logger.log(Level.WARNING,
+ "Expected literal value or dcterms:W3CDTF instance, not: " + rdfNode,
+ ex);
+ return null;
+ }
Object value = literal.getValue();
XSDDateTime dateTime;
if (value instanceof XSDDateTime) {
dateTime = (XSDDateTime) value;
} else {
- logger.info("Literal not an XSDDateTime, but: " + value.getClass()
- + " " + value);
-
+ logger.info("Literal not an XSDDateTime, but: " + value.getClass() + " " + value);
// Try to parse it anyway
try {
- dateTime = (XSDDateTime) XSDdateTime.parse(literal
- .getLexicalForm());
+ dateTime = (XSDDateTime) XSDdateTime.parse(literal.getLexicalForm());
} catch (DatatypeFormatException e) {
logger.warning("Invalid datetime: " + literal);
return null;