You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2011/10/04 12:44:20 UTC
svn commit: r1178753 - in
/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot:
system/RiotChars.java tokens/TokenizerText.java
Author: andy
Date: Tue Oct 4 10:44:19 2011
New Revision: 1178753
URL: http://svn.apache.org/viewvc?rev=1178753&view=rev
Log:
Pre-work for JENA_129
Modified:
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java?rev=1178753&r1=1178752&r2=1178753&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java Tue Oct 4 10:44:19 2011
@@ -59,8 +59,40 @@ public class RiotChars
public static boolean isNewlineChar(int ch)
{
return ch == '\r' || ch == '\n' ;
- }
-
+ }
+
+ /*
+The token rules from SPARQL and Turtle.
+PNAME_NS ::= PN_PREFIX? ':'
+PNAME_LN ::= PNAME_NS PN_LOCAL[131] BLANK_NODE_LABEL ::= '_:' PN_LOCAL
+PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+PN_CHARS_U ::= PN_CHARS_BASE | '_'
+VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
+PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
+PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
+ */
+
+ public static boolean isPNCharsBase(int ch)
+ {
+ //??
+ //int type = Character.getType(ch) ;
+ //Character.COMBINING_SPACING_MARK -> What category are we looking at?
+ return
+ r(ch, 'a', 'z') || r(ch, 'A', 'Z') || r(ch, 0x00C0, 0x00D6) || r(ch, 0x00D8, 0x00F6) ||
+ r(ch, 0x00F8, 0x02FF) || r(ch, 0x0370, 0x037D) || r(ch, 0x037F, 0x1FFF) ||
+ r(ch, 0x200C, 0x200D) || r(ch, 0x2070, 0x218F) ||
+ r(ch, 0x2C00 , 0x2FEF) || r(ch, 0x3001, 0xD7FF) || r(ch, 0xF900, 0xFDCF) || r(ch, 0xFDF0, 0xFFFD) ||
+ r(ch, 0x10000, 0xEFFFF) ; // Outside the basic plain.
+ }
+
+ public static boolean isPNChars_U(int ch)
+ {
+ return isPNCharsBase(ch) || ( ch == '_' ) ;
+ }
+
+
+
public static int valHexChar(int ch)
{
if ( range(ch, '0', '9') )
@@ -72,6 +104,8 @@ public class RiotChars
return -1 ;
}
+ private static boolean r(int ch, int a, int b) { return ( ch >= a && ch <= b ) ; }
+
public static boolean range(int ch, char a, char b)
{
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java?rev=1178753&r1=1178752&r2=1178753&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java Tue Oct 4 10:44:19 2011
@@ -482,6 +482,25 @@ public final class TokenizerText impleme
}
+ /*
+ The token rules from SPARQL and Turtle.
+ PNAME_NS ::= PN_PREFIX? ':'
+ PNAME_LN ::= PNAME_NS PN_LOCAL[131] BLANK_NODE_LABEL ::= '_:' PN_LOCAL
+ PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+ PN_CHARS_U ::= PN_CHARS_BASE | '_'
+ VARNAME ::= ( PN_CHARS_U | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
+ PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+ PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
+ PN_LOCAL ::= ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
+ */
+
+
+ private String readLocalPart()
+ { return readWordSub(true, false) ; }
+
+ private String readPrefixPart()
+ { return readWordSub(false, false) ; }
+
// Get characters between two markers.
// strEscapes may be processed
// endNL end of line as an ending is OK
@@ -547,13 +566,7 @@ public final class TokenizerText impleme
insertCodepoint(stringBuilder, ch) ;
}
}
-
- private String readLocalPart()
- { return readWordSub(true, false) ; }
-
- private String readPrefixPart()
- { return readWordSub(false, false) ; }
-
+
private String readWord(boolean leadingDigitAllowed)
{ return readWordSub(leadingDigitAllowed, false) ; }