You are viewing a plain text version of this content. The canonical link for it is here.
Posted to pr@jena.apache.org by GitBox <gi...@apache.org> on 2021/03/01 19:02:40 UTC

[GitHub] [jena] kinow commented on a change in pull request #940: JENA-2031: Refine IRI settings

kinow commented on a change in pull request #940:
URL: https://github.com/apache/jena/pull/940#discussion_r584954705



##########
File path: jena-arq/src/main/java/org/apache/jena/riot/RDFParserBuilder.java
##########
@@ -291,6 +293,14 @@ public RDFParserBuilder httpClient(HttpClient httpClient) {
      */
     public RDFParserBuilder resolveURIs(boolean flag) { this.resolveURIs = flag ; return this; }
 
+    /**
+     * Provide a specific {@link IRIxResolver} to check and resolve URIs. It's

Review comment:
       s/It's/Its

##########
File path: jena-arq/src/main/java/org/apache/jena/riot/system/Checker.java
##########
@@ -18,161 +18,363 @@
 
 package org.apache.jena.riot.system;
 
+import java.util.Iterator;
+import java.util.Objects;
+import java.util.regex.Pattern;
 
-import org.apache.jena.graph.Node ;
-import org.apache.jena.graph.Triple ;
-import org.apache.jena.riot.checker.* ;
+import org.apache.jena.JenaRuntime;
+import org.apache.jena.datatypes.RDFDatatype;
+import org.apache.jena.graph.Node;
+import org.apache.jena.graph.Node_Triple;
+import org.apache.jena.graph.Triple;
+import org.apache.jena.iri.IRI;
+import org.apache.jena.iri.IRIComponents;
+import org.apache.jena.iri.Violation;
+import org.apache.jena.irix.IRIs;
+import org.apache.jena.irix.SetupJenaIRI;
 import org.apache.jena.sparql.core.Quad;
+import org.apache.jena.sparql.graph.NodeConst;
+import org.apache.jena.util.SplitIRI;
 
-/** A checker that drives the process of validating RDF terms, triples and quads. */
-public final class Checker
-{
-    private boolean allowRelativeIRIs = false ;
-    private boolean warningsAreErrors = false ;
-    private ErrorHandler handler ;
+/**
+ * Functions for checking nodes, triples and quads.
+ * <p>
+ * If the errorHandler is null, use the system wide handler.
+ * <p>
+ * If the errorHandler line/columns numbers are -1, -1, messages do not include them.
+ * <p>
+ * Operations "<tt>checkXXX(<i>item</i>)</tt>" are for boolean testing
+ * and do not generate output.
+ */
 
-    private NodeChecker checkLiterals ;
-    private NodeChecker checkURIs ;
-    private NodeChecker checkBlankNodes ;
-    private NodeChecker checkVars ;
+public class Checker {
 
-    public Checker() {
-        this(null);
+    /** A node -- must be concrete node or a variable. */
+    public static boolean check(Node node) {
+        return check(node, nullErrorHandler, -1, -1);
     }
 
-    public Checker(ErrorHandler handler) {
-        if ( handler == null )
-            handler = ErrorHandlerFactory.getDefaultErrorHandler();
-        this.handler = handler;
+    /** A node -- must be a concrete node or a variable. */
+    public static boolean check(Node node, ErrorHandler errorHandler, long line, long col) {
+        if ( node.isURI() )
+            return checkIRI(node, errorHandler, line, col);
+        else if ( node.isBlank() )
+            return checkBlankNode(node, errorHandler, line, col);
+        else if ( node.isLiteral() )
+            return checkLiteral(node, errorHandler, line, col);
+        else if ( node.isVariable() )
+            return checkVar(node, errorHandler, line, col);
+        else if ( node.isNodeTriple() ) {
+            Triple t = Node_Triple.triple(node);
+            return check(t.getSubject()) && check(t.getPredicate()) && check(t.getObject())
+                    && checkTriple(t);
+        }
+        errorHandler(errorHandler).warning("Not a recognized node: ", line, col);
+        return false;
+    }
 
-        checkLiterals = new CheckerLiterals(handler);
+    // ==== IRIs
 
-        checkURIs = new CheckerIRI(handler);
-        checkBlankNodes = new CheckerBlankNodes(handler);
-        checkVars = new CheckerVar(handler);
+    public static boolean checkIRI(Node node) {
+        return checkIRI(node, nullErrorHandler, -1, -1);
     }
 
-    public boolean check(Node node, long line, long col) {
-        // NodeVisitor?
-        if      ( node.isURI() )        return checkIRI(node, line, col) ;
-        else if ( node.isBlank() )      return checkBlank(node, line, col) ;
-        else if ( node.isLiteral() )    return checkLiteral(node, line, col) ;
-        else if ( node.isVariable() )   return checkVar(node, line, col) ;
-        handler.warning("Not a recognized node: ", line, col) ;
-        return false ;
+    public static boolean checkIRI(Node node, ErrorHandler errorHandler, long line, long col) {
+        if ( !node.isURI() ) {
+            errorHandler(errorHandler).error("Not a URI: " + node, line, col);
+            return false;
+        }
+        return checkIRI(node.getURI(), errorHandler, -1, -1);
     }
 
-    /** Check a triple - assumes individual nodes are legal */
-    public boolean check(Triple triple, long line, long col) {
-        return checkTriple(triple.getSubject(), triple.getPredicate(), triple.getObject(), line, col);
+    public static boolean checkIRI(String iriStr) {
+        return checkIRI(iriStr, nullErrorHandler, -1, -1);
     }
 
-    /** Check a triple against the RDF rules for a triple : subject is a IRI or bnode, predicate is a IRI and object is an bnode, literal or IRI */
-    public boolean checkTriple(Node subject, Node predicate, Node object, long line, long col) {
-        boolean rc = true;
+    /** See also {@link IRIs#reference} */
+    public static boolean checkIRI(String iriStr, ErrorHandler errorHandler, long line, long col) {
+        IRI iri = SetupJenaIRI.iriCheckerFactory().create(iriStr);
+        boolean b = iriViolations(iri, errorHandler, line, col);
+        return b;
+    }
 
-        if ( subject == null || (!subject.isURI() && !subject.isBlank()) ) {
-            handler.error("Subject is not a URI or blank node", line, col);
-            rc = false;
-        }
-        if ( predicate == null || (!predicate.isURI()) ) {
-            handler.error("Predicate not a URI", line, col);
-            rc = false;
+    /**
+     * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and
+     * warnings (as warnings).
+     */
+    public static void iriViolations(IRI iri) {
+        iriViolations(iri, null, false, true, -1L, -1L);
+    }
+
+    /**
+     * Process violations on an IRI Calls the {@link ErrorHandler} on all errors and
+     * warnings (as warnings).
+     */
+    public static boolean iriViolations(IRI iri, ErrorHandler errorHandler, long line, long col) {
+        return iriViolations(iri, errorHandler, false, true, line, col);
+    }
+
+    /**
+     * Process violations on an IRI Calls the errorHandler on all errors and warnings
+     * (as warning). (If checking for relative IRIs, these are sent out as errors.)
+     * Assumes error handler throws exceptions on errors if need be
+     */
+    public static boolean iriViolations(IRI iri, ErrorHandler errorHandler,
+                                        boolean allowRelativeIRIs, boolean includeIRIwarnings,
+                                        long line, long col) {
+        if ( !allowRelativeIRIs && iri.isRelative() )
+            errorHandler(errorHandler).error("Relative IRI: " + iri, line, col);
+
+        boolean isOK = true;
+
+        if ( iri.hasViolation(includeIRIwarnings) ) {
+            Iterator<Violation> iter = iri.violations(includeIRIwarnings);
+
+            for ( ; iter.hasNext() ; ) {
+                Violation v = iter.next();
+                int code = v.getViolationCode();
+                boolean isError = v.isError();
+
+                // Anything we want to reprioritise?
+                if ( code == Violation.LOWERCASE_PREFERRED && v.getComponent() != IRIComponents.SCHEME ) {
+                    // Issue warning about the scheme part. Not e.g. DNS names.
+                    continue;
+                }
+                String msg = v.getShortMessage();
+                String iriStr = iri.toString();
+
+                errorHandler(errorHandler).warning("Bad IRI: " + msg, line, col);
+
+//                if ( isError )
+//                    errorHandler(errorHandler).warning("Bad IRI: " + msg, line, col);
+//                else
+//                    errorHandler(errorHandler).warning("Not advised IRI: " + msg, line, col);
+                isOK = true;
+            }
         }
-        if ( object == null || (!object.isURI() && !object.isBlank() && !object.isLiteral()) ) {
-            handler.error("Object is not a URI, blank node or literal", line, col);
-            rc = false;
+        return isOK;
+    }
+
+    // ==== Literals
+
+    final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*");
+
+    public static boolean checkLiteral(Node node) {
+        return checkLiteral(node, nullErrorHandler, -1, -1);
+    }
+
+    public static boolean checkLiteral(Node node, ErrorHandler errorHandler, long line, long col) {
+        if ( !node.isLiteral() ) {
+            errorHandler(errorHandler).error("Not a literal: " + node, line, col);
+            return false;
         }
-        return rc;
+
+        return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype(), errorHandler, line, col);
     }
 
-    /** Check a quad - assumes individual nodes are legal */
-    public boolean checkQuad(Quad quad, long line, long col) {
-        return checkQuad(quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject(), line, col);
+    public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype, ErrorHandler errorHandler, long line, long col) {
+        return checkLiteral(lexicalForm, null, datatype, errorHandler, line, col);
     }
 
-    /** Check a quad against the RDF rules for a quad : subject is a IRI or bnode, predicate is a IRI and object is an bnode, literal or IRI */
-    public boolean checkQuad(Node graph, Node subject, Node predicate, Node object, long line, long col) {
-        boolean rc = true;
+    public static boolean checkLiteral(String lexicalForm, String lang, ErrorHandler errorHandler, long line, long col) {
+        return checkLiteral(lexicalForm, lang, null, errorHandler, line, col);
+    }
 
-        if ( graph == null || (!graph.isURI() && !graph.isBlank()) ) {
-            handler.error("Graph name is not a URI or blank node", line, col);
-            rc = false;
+    public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype, ErrorHandler errorHandler, long line,
+                                       long col) {
+        boolean hasLang = lang != null && !lang.equals("");
+        if ( !hasLang ) {
+            // Datatype check (and RDF 1.0 simple literals are always well formed)
+            if ( datatype != null )
+                return validateByDatatype(lexicalForm, datatype, errorHandler, line, col);
+            return true;
         }
 
-        if ( subject == null || (!subject.isURI() && !subject.isBlank() && !subject.isNodeTriple() ) ) {
-            handler.error("Subject is not a URI, blank node or RDF-star triple term", line, col);
-            rc = false;
+        // Has a language.
+        if ( JenaRuntime.isRDF11 ) {
+            if ( datatype != null && !Objects.equals(datatype.getURI(), NodeConst.rdfLangString.getURI()) ) {
+                errorHandler(errorHandler).error("Literal has language but wrong datatype", line, col);
+                return false;
+            }
+        } else {
+            if ( datatype != null ) {
+                errorHandler(errorHandler).error("Literal has datatype and language", line, col);
+                return false;
+            }
         }
-        if ( predicate == null || (!predicate.isURI()) ) {
-            handler.error("Predicate not a URI", line, col);
-            rc = false;
+
+        // Test language tag format -- not a perfect test.
+        if ( !lang.isEmpty() && !langPattern.matcher(lang).matches() ) {
+            errorHandler(errorHandler).warning("Language not valid: " + lang, line, col);
+            return false;
         }
-        if ( object == null || (!object.isURI() && !object.isBlank() && !object.isLiteral() && !subject.isNodeTriple() ) ) {
-            handler.error("Object is not a URI, blank node, literal or RDF-star triple term", line, col);
-            rc = false;
+        return true;
+    }
+
+    // Whitespace.
+    // XSD allows whitespace before and after the lexical forms of a literal but not
+    // insiode.

Review comment:
       s/insiode/inside

##########
File path: jena-core/src/main/java/org/apache/jena/irix/SystemIRIx.java
##########
@@ -68,15 +89,24 @@ private static IRIx establishBaseURI() {
             String baseStr = IRILib.filenameToIRI("./");
             if ( ! baseStr.endsWith("/") )
                 baseStr = baseStr+"/";
-            return setSystemBase(baseStr);
+            return setupBase(baseStr);
         } catch (Throwable ex) {
             ex.printStackTrace();
             // e.g. No filesystem.
-            return IRIx.create("urn:base:");
+            return IRIx.create(fallbackBaseURI);
         }
     }
 
-    private static IRIx setSystemBase(String baseStr) {
+    /**
+     * Create an {@link IRIx} suitable for a system base.
+     * This oepration always returns an {@link IRIx}

Review comment:
       s/oepration/operation




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: pr-unsubscribe@jena.apache.org
For additional commands, e-mail: pr-help@jena.apache.org