You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2014/11/25 17:27:01 UTC
[39/48] jena git commit: JENA-816 : Checking literals (RDF 1.0 and
1.1 forms)
JENA-816 : Checking literals (RDF 1.0 and 1.1 forms)
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/707ecb9f
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/707ecb9f
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/707ecb9f
Branch: refs/heads/hadoop-rdf
Commit: 707ecb9f81668ca24a88edb17c51b6e77105c734
Parents: c23422a
Author: Andy Seaborne <an...@apache.org>
Authored: Sat Nov 22 16:54:57 2014 +0000
Committer: Andy Seaborne <an...@apache.org>
Committed: Sat Nov 22 16:54:57 2014 +0000
----------------------------------------------------------------------
.../jena/riot/checker/CheckerLiterals.java | 164 ++++++++++---------
1 file changed, 89 insertions(+), 75 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/707ecb9f/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java b/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java
index ca5cf6b..4fa8f53 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/checker/CheckerLiterals.java
@@ -16,8 +16,9 @@
* limitations under the License.
*/
-package org.apache.jena.riot.checker;
+package org.apache.jena.riot.checker ;
+import java.util.Objects ;
import java.util.regex.Pattern ;
import org.apache.jena.riot.SysRIOT ;
@@ -28,91 +29,88 @@ import org.apache.xerces.impl.dv.ValidationContext ;
import org.apache.xerces.impl.dv.XSSimpleType ;
import org.apache.xerces.impl.validation.ValidationState ;
+import com.hp.hpl.jena.JenaRuntime ;
import com.hp.hpl.jena.datatypes.RDFDatatype ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDAbstractDateTimeType ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDBaseNumericType ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDDouble ;
import com.hp.hpl.jena.datatypes.xsd.impl.XSDFloat ;
import com.hp.hpl.jena.graph.Node ;
+import com.hp.hpl.jena.sparql.graph.NodeConst ;
-public class CheckerLiterals implements NodeChecker
-{
+public class CheckerLiterals implements NodeChecker {
// A flag to enable the test suite to read bad data.
public static boolean WarnOnBadLiterals = true ;
-
- private ErrorHandler handler ;
- public CheckerLiterals(ErrorHandler handler)
- {
+
+ private ErrorHandler handler ;
+
+ public CheckerLiterals(ErrorHandler handler) {
this.handler = handler ;
}
-
+
@Override
- public boolean check(Node node, long line, long col)
- { return node.isLiteral() && checkLiteral(node, handler, line, col) ; }
-
+ public boolean check(Node node, long line, long col) {
+ return node.isLiteral() && checkLiteral(node, handler, line, col) ;
+ }
+
final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ;
- public static boolean checkLiteral(Node node, ErrorHandler handler, long line, long col)
- {
- if ( ! node.isLiteral() )
- {
- handler.error("Not a literal: "+node, line, col) ;
+ public static boolean checkLiteral(Node node, ErrorHandler handler, long line, long col) {
+ if ( !node.isLiteral() ) {
+ handler.error("Not a literal: " + node, line, col) ;
return false ;
}
-
- return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype(),
- handler, line, col) ;
+
+ return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype(), handler, line, col) ;
}
-
-
- public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype,
- ErrorHandler handler, long line, long col)
- {
+
+ public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
return checkLiteral(lexicalForm, null, datatype, handler, line, col) ;
}
- public static boolean checkLiteral(String lexicalForm, String lang,
- ErrorHandler handler, long line, long col)
- {
+ public static boolean checkLiteral(String lexicalForm, String lang, ErrorHandler handler, long line, long col) {
return checkLiteral(lexicalForm, lang, null, handler, line, col) ;
}
-
- public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype,
- ErrorHandler handler, long line, long col)
- {
- if ( ! WarnOnBadLiterals )
+
+ public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype, ErrorHandler handler,
+ long line, long col) {
+ if ( !WarnOnBadLiterals )
return true ;
-
- boolean hasLang = lang != null && ! lang.equals("") ;
-
- if ( datatype != null && hasLang )
- handler.error("Literal has datatype and language", line, col) ;
-
- // Datatype check (and plain literals are always well formed)
- if ( datatype != null )
- return validateByDatatype(lexicalForm, datatype, handler, line, col) ;
-
- // No datatype. Language?
-
- if ( hasLang )
- {
- // Not a perfect test.
- if ( lang.length() > 0 && ! langPattern.matcher(lang).matches() )
- {
- handler.warning("Language not valid: "+lang, line, col) ;
- return false;
+
+ boolean hasLang = lang != null && !lang.equals("") ;
+ if ( !hasLang ) {
+ // Datatype check (and RDF 1.0 simpl literals are always well
+ // formed)
+ if ( datatype != null )
+ return validateByDatatype(lexicalForm, datatype, handler, line, col) ;
+ return true ;
+ }
+
+ // Has a language.
+ if ( JenaRuntime.isRDF11 ) {
+ if ( datatype != null && !Objects.equals(datatype.getURI(), NodeConst.rdfLangString.getURI()) ) {
+ handler.error("Literal has language but wrong datatype", line, col) ;
+ return false ;
+ }
+ } else {
+ if ( datatype != null ) {
+ handler.error("Literal has datatype and language", line, col) ;
+ return false ;
}
}
-
+
+ // Test language tag format -- not a perfect test.
+ if ( !lang.isEmpty() && !langPattern.matcher(lang).matches() ) {
+ handler.warning("Language not valid: " + lang, line, col) ;
+ return false ;
+ }
return true ;
}
- protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
- {
- // XXX Reconsider.
- if ( SysRIOT.StrictXSDLexicialForms )
- {
- if ( datatype instanceof XSDBaseNumericType || datatype instanceof XSDFloat || datatype instanceof XSDDouble )
+ protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
+ if ( SysRIOT.StrictXSDLexicialForms ) {
+ if ( datatype instanceof XSDBaseNumericType || datatype instanceof XSDFloat
+ || datatype instanceof XSDDouble )
return validateByDatatypeNumeric(lexicalForm, datatype, handler, line, col) ;
if ( datatype instanceof XSDAbstractDateTimeType )
return validateByDatatypeDateTime(lexicalForm, datatype, handler, line, col) ;
@@ -120,30 +118,46 @@ public class CheckerLiterals implements NodeChecker
return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
}
- protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
- {
+ protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler,
+ long line, long col) {
if ( datatype.isValid(lexicalForm) )
- return true ;
- handler.warning("Lexical form '"+lexicalForm+"' not valid for datatype "+datatype.getURI(), line, col) ;
+ return true ;
+ handler.warning("Lexical form '" + lexicalForm + "' not valid for datatype " + datatype.getURI(), line, col) ;
return false ;
}
-
- protected static boolean validateByDatatypeDateTime(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
- {
- if ( lexicalForm.contains(" ") ) { handler.warning("Whitespace in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
- if ( lexicalForm.contains("\n") ) { handler.warning("Newline in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
- if ( lexicalForm.contains("\r") ) { handler.warning("Newline in XSD date or time literal: '"+lexicalForm+"'", line, col) ; return false ; }
- //if ( ! StrictXSDLexicialForms )
+
+ protected static boolean validateByDatatypeDateTime(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
+ if ( lexicalForm.contains(" ") ) {
+ handler.warning("Whitespace in XSD date or time literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
+ if ( lexicalForm.contains("\n") ) {
+ handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
+ if ( lexicalForm.contains("\r") ) {
+ handler.warning("Newline in XSD date or time literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
+ // if ( ! StrictXSDLexicialForms )
// Jena is already strict.
return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
}
-
- protected static boolean validateByDatatypeNumeric(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col)
- {
+
+ protected static boolean validateByDatatypeNumeric(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
// Do a white space check as well for numerics.
- if ( lexicalForm.contains(" ") ) { handler.warning("Whitespace in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
- if ( lexicalForm.contains("\n") ) { handler.warning("Newline in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
- if ( lexicalForm.contains("\r") ) { handler.warning("Carriage return in numeric XSD literal: '"+lexicalForm+"'", line, col) ; return false ; }
+ if ( lexicalForm.contains(" ") ) {
+ handler.warning("Whitespace in numeric XSD literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
+ if ( lexicalForm.contains("\n") ) {
+ handler.warning("Newline in numeric XSD literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
+ if ( lexicalForm.contains("\r") ) {
+ handler.warning("Carriage return in numeric XSD literal: '" + lexicalForm + "'", line, col) ;
+ return false ;
+ }
// if ( lit.getDatatype() instanceof XSDAbstractDateTimeType )
// {