You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2011/10/04 12:44:20 UTC
svn commit: r1178753 - in /incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot: system/RiotChars.java tokens/TokenizerText.java

Author: andy
Date: Tue Oct  4 10:44:19 2011
New Revision: 1178753

URL: http://svn.apache.org/viewvc?rev=1178753&view=rev
Log:
Pre-work for JENA_129

Modified:
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
    incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java?rev=1178753&r1=1178752&r2=1178753&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java Tue Oct  4 10:44:19 2011
@@ -59,8 +59,40 @@ public class RiotChars
     public static boolean isNewlineChar(int ch)
     {
         return ch == '\r' || ch == '\n' ;
-    }
-
+    }
+    
+    /*
+The token rules from SPARQL and Turtle.
+PNAME_NS       ::=  PN_PREFIX? ':'
+PNAME_LN       ::=  PNAME_NS PN_LOCAL[131]  BLANK_NODE_LABEL  ::=  '_:' PN_LOCAL
+PN_CHARS_BASE  ::=  [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+PN_CHARS_U     ::=  PN_CHARS_BASE | '_'
+VARNAME        ::=  ( PN_CHARS_U  | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
+PN_CHARS       ::=  PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+PN_PREFIX      ::=  PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
+PN_LOCAL       ::=  ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
+     */
+    
+    public static boolean isPNCharsBase(int ch)
+    {
+        //??
+        //int type = Character.getType(ch) ;
+        //Character.COMBINING_SPACING_MARK -> What category are we looking at?
+        return 
+            r(ch, 'a', 'z') || r(ch, 'A', 'Z') || r(ch, 0x00C0, 0x00D6) || r(ch, 0x00D8, 0x00F6) ||
+            r(ch, 0x00F8, 0x02FF) || r(ch, 0x0370, 0x037D) || r(ch, 0x037F, 0x1FFF) || 
+            r(ch, 0x200C, 0x200D) || r(ch, 0x2070, 0x218F) ||
+            r(ch, 0x2C00 , 0x2FEF) || r(ch, 0x3001, 0xD7FF) || r(ch, 0xF900, 0xFDCF) || r(ch, 0xFDF0, 0xFFFD) ||
+            r(ch, 0x10000, 0xEFFFF) ; // Outside the basic plain. 
+    }
+    
+    public static boolean isPNChars_U(int ch)
+    {
+        return isPNCharsBase(ch) || ( ch == '_' ) ;
+    }
+    
+    
+    
     public static int valHexChar(int ch)
     {
         if ( range(ch, '0', '9') )
@@ -72,6 +104,8 @@ public class RiotChars
         return -1 ;
     }
 
+    private static boolean r(int ch, int a, int b) { return ( ch >= a && ch <= b ) ; }
+    
     
     public static boolean range(int ch, char a, char b)
     {

Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java?rev=1178753&r1=1178752&r2=1178753&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java Tue Oct  4 10:44:19 2011
@@ -482,6 +482,25 @@ public final class TokenizerText impleme
         
     }
     
+    /*
+    The token rules from SPARQL and Turtle.
+    PNAME_NS       ::=  PN_PREFIX? ':'
+    PNAME_LN       ::=  PNAME_NS PN_LOCAL[131]  BLANK_NODE_LABEL  ::=  '_:' PN_LOCAL
+    PN_CHARS_BASE  ::=  [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
+    PN_CHARS_U     ::=  PN_CHARS_BASE | '_'
+    VARNAME        ::=  ( PN_CHARS_U  | [0-9] ) ( PN_CHARS_U | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040] )*
+    PN_CHARS       ::=  PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+    PN_PREFIX      ::=  PN_CHARS_BASE ((PN_CHARS|'.')* PN_CHARS)?
+    PN_LOCAL       ::=  ( PN_CHARS_U | [0-9] ) ((PN_CHARS|'.')* PN_CHARS)?
+    */
+        
+    
+    private String readLocalPart()
+    { return readWordSub(true, false) ; }
+
+    private String readPrefixPart()
+    { return readWordSub(false, false) ; }
+
     // Get characters between two markers.
     // strEscapes may be processed
     // endNL end of line as an ending is OK
@@ -547,13 +566,7 @@ public final class TokenizerText impleme
             insertCodepoint(stringBuilder, ch) ;
         }
     }
-
-    private String readLocalPart()
-    { return readWordSub(true, false) ; }
-    
-    private String readPrefixPart()
-    { return readWordSub(false, false) ; }
-    
+
     private String readWord(boolean leadingDigitAllowed)
     { return readWordSub(leadingDigitAllowed, false) ; }