You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2011/10/05 20:16:44 UTC
svn commit: r1179358 - in /incubator/jena/Jena2/ARQ/trunk/src:
main/java/org/openjena/riot/system/RiotChars.java
main/java/org/openjena/riot/tokens/TokenizerText.java
test/java/org/openjena/riot/tokens/TestTokenizer.java
Author: andy
Date: Wed Oct 5 18:16:44 2011
New Revision: 1179358
URL: http://svn.apache.org/viewvc?rev=1179358&view=rev
Log:
JENA-129
Modified:
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java
incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/riot/tokens/TestTokenizer.java
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java?rev=1179358&r1=1179357&r2=1179358&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/system/RiotChars.java Wed Oct 5 18:16:44 2011
@@ -75,34 +75,34 @@ PN_LOCAL ::= ( PN_CHARS_U | [0-9]
public static boolean isPNCharsBase(int ch)
{
- //??
- //int type = Character.getType(ch) ;
- //Character.COMBINING_SPACING_MARK -> What category are we looking at?
+ // PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] |
+ // [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] |
+ // [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] |
+ // [#x10000-#xEFFFF]
return
- r(ch, 'a', 'z') || r(ch, 'A', 'Z') || r(ch, 0x00C0, 0x00D6) || r(ch, 0x00D8, 0x00F6) ||
- r(ch, 0x00F8, 0x02FF) || r(ch, 0x0370, 0x037D) || r(ch, 0x037F, 0x1FFF) ||
- r(ch, 0x200C, 0x200D) || r(ch, 0x2070, 0x218F) ||
- r(ch, 0x2C00 , 0x2FEF) || r(ch, 0x3001, 0xD7FF) || r(ch, 0xF900, 0xFDCF) || r(ch, 0xFDF0, 0xFFFD) ||
+ r(ch, 'a', 'z') || r(ch, 'A', 'Z') || r(ch, 0x00C0, 0x00D6) || r(ch, 0x00D8, 0x00F6) || r(ch, 0x00F8, 0x02FF) ||
+ r(ch, 0x0370, 0x037D) || r(ch, 0x037F, 0x1FFF) || r(ch, 0x200C, 0x200D) || r(ch, 0x2070, 0x218F) ||
+ r(ch, 0x2C00, 0x2FEF) || r(ch, 0x3001, 0xD7FF) || r(ch, 0xF900, 0xFDCF) || r(ch, 0xFDF0, 0xFFFD) ||
r(ch, 0x10000, 0xEFFFF) ; // Outside the basic plain.
}
public static boolean isPNChars_U(int ch)
{
+ //PN_CHARS_BASE | '_'
return isPNCharsBase(ch) || ( ch == '_' ) ;
}
public static boolean isPNChars_U_N(int ch)
{
+ // PN_CHARS_U | [0-9]
return isPNCharsBase(ch) || ( ch == '_' ) || isDigit(ch) ;
}
public static boolean isPNChars(int ch)
{
- // #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
- return isPNChars_U(ch) || ( ch == '-' ) || ch == 0x00B7 || r(ch, 0x306, 0x036F) || r(ch, 0x203F, 0x2040) ;
+ // PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
+ return isPNChars_U(ch) || isDigit(ch) || ( ch == '-' ) || ch == 0x00B7 || r(ch, 0x300, 0x036F) || r(ch, 0x203F, 0x2040) ;
}
-
-
public static int valHexChar(int ch)
{
Modified: incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java?rev=1179358&r1=1179357&r2=1179358&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/main/java/org/openjena/riot/tokens/TokenizerText.java Wed Oct 5 18:16:44 2011
@@ -18,17 +18,47 @@
package org.openjena.riot.tokens;
-import static org.openjena.atlas.lib.Chars.* ;
-import static org.openjena.riot.system.RiotChars.* ;
-
-import java.util.NoSuchElementException ;
-
-import org.openjena.atlas.AtlasException ;
-import org.openjena.atlas.io.IO ;
-import org.openjena.atlas.io.PeekReader ;
-import org.openjena.atlas.lib.Chars ;
-import org.openjena.riot.RiotParseException ;
-import org.openjena.riot.system.RiotChars ;
+import static org.openjena.atlas.lib.Chars.CH_AT ;
+import static org.openjena.atlas.lib.Chars.CH_COLON ;
+import static org.openjena.atlas.lib.Chars.CH_COMMA ;
+import static org.openjena.atlas.lib.Chars.CH_DOT ;
+import static org.openjena.atlas.lib.Chars.CH_EQUALS ;
+import static org.openjena.atlas.lib.Chars.CH_GT ;
+import static org.openjena.atlas.lib.Chars.CH_HASH ;
+import static org.openjena.atlas.lib.Chars.CH_LBRACE ;
+import static org.openjena.atlas.lib.Chars.CH_LBRACKET ;
+import static org.openjena.atlas.lib.Chars.CH_LPAREN ;
+import static org.openjena.atlas.lib.Chars.CH_LT ;
+import static org.openjena.atlas.lib.Chars.CH_MINUS ;
+import static org.openjena.atlas.lib.Chars.CH_PLUS ;
+import static org.openjena.atlas.lib.Chars.CH_QMARK ;
+import static org.openjena.atlas.lib.Chars.CH_QUOTE1 ;
+import static org.openjena.atlas.lib.Chars.CH_QUOTE2 ;
+import static org.openjena.atlas.lib.Chars.CH_RBRACE ;
+import static org.openjena.atlas.lib.Chars.CH_RBRACKET ;
+import static org.openjena.atlas.lib.Chars.CH_RPAREN ;
+import static org.openjena.atlas.lib.Chars.CH_SEMICOLON ;
+import static org.openjena.atlas.lib.Chars.CH_STAR ;
+import static org.openjena.atlas.lib.Chars.CH_UNDERSCORE ;
+import static org.openjena.atlas.lib.Chars.CR ;
+import static org.openjena.atlas.lib.Chars.EOF ;
+import static org.openjena.atlas.lib.Chars.NL ;
+import static org.openjena.riot.system.RiotChars.charInArray ;
+import static org.openjena.riot.system.RiotChars.isA2Z ;
+import static org.openjena.riot.system.RiotChars.isA2ZN ;
+import static org.openjena.riot.system.RiotChars.isAlphaNumeric ;
+import static org.openjena.riot.system.RiotChars.isNewlineChar ;
+import static org.openjena.riot.system.RiotChars.isWhitespace ;
+import static org.openjena.riot.system.RiotChars.range ;
+import static org.openjena.riot.system.RiotChars.valHexChar ;
+
+import java.util.NoSuchElementException ;
+
+import org.openjena.atlas.AtlasException ;
+import org.openjena.atlas.io.IO ;
+import org.openjena.atlas.io.PeekReader ;
+import org.openjena.riot.RiotParseException ;
+import org.openjena.riot.system.RiotChars ;
/** Tokenizer for all sorts of things RDF-ish */
@@ -497,8 +527,13 @@ public final class TokenizerText impleme
*/
+ private String readPrefixPart()
+ //{ return readWordSub(false, false) ; }
+ { return readSegment(false) ; }
+
private String readLocalPart()
- { return readWordSub(true, false) ; }
+ //{ return readWordSub(true, false) ; }
+ { return readSegment(true) ; }
private String readSegment(boolean isLocalPart)
{
@@ -507,7 +542,7 @@ public final class TokenizerText impleme
// RiotChars has isPNChars_U_N for ( PN_CHARS_U | [0-9] )
stringBuilder.setLength(0) ;
- // First character
+ // -- Test first character
int ch = reader.peekChar() ;
if ( ch == EOF )
return "" ;
@@ -519,35 +554,47 @@ public final class TokenizerText impleme
{
if ( ! RiotChars.isPNCharsBase(ch) ) return "" ;
}
-
+ // ch is not added to the buffer until ...
+ // -- Do remainer
stringBuilder.append((char)ch) ;
reader.readChar() ;
+ int chDot = 0 ;
- boolean canBeLast = true ;
for (;;)
{
- // Put previous chacarer in buffer
- stringBuilder.append((char)ch) ;
- reader.readChar() ;
-
ch = reader.peekChar() ;
- if ( ! RiotChars.isPNChars(ch) && ch == Chars.CH_DOT )
+ if ( RiotChars.isPNChars(ch) )
+ {
+ reader.readChar() ;
+ // Was there also a DOT?
+ if ( chDot != 0 )
+ {
+ stringBuilder.append((char)chDot) ;
+ chDot = 0 ;
+ }
+ stringBuilder.append((char)ch) ;
+ continue ;
+ }
+ // Not isPNChars
+ if ( ch != CH_DOT )
break ;
- }
- // End condition.
- if ( ch != Chars.CH_DOT )
- {
- stringBuilder.append((char)ch) ;
+ // DOT
reader.readChar() ;
+ chDot = ch ;
}
+ // On exit, chDot may hold a character.
+
+ if ( chDot == CH_DOT )
+ // Unread it.
+ reader.pushbackChar(chDot) ;
+
+ //stringBuilder.deleteCharAt(chDot)
+
return stringBuilder.toString() ;
}
- private String readPrefixPart()
- { return readWordSub(false, false) ; }
-
// Get characters between two markers.
// strEscapes may be processed
// endNL end of line as an ending is OK
Modified: incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/riot/tokens/TestTokenizer.java
URL: http://svn.apache.org/viewvc/incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/riot/tokens/TestTokenizer.java?rev=1179358&r1=1179357&r2=1179358&view=diff
==============================================================================
--- incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/riot/tokens/TestTokenizer.java (original)
+++ incubator/jena/Jena2/ARQ/trunk/src/test/java/org/openjena/riot/tokens/TestTokenizer.java Wed Oct 5 18:16:44 2011
@@ -435,6 +435,47 @@ public class TestTokenizer extends BaseT
assertEquals("-456", token.getImage()) ;
}
+
+ @Test
+ public void tokenUnit_pname10()
+ {
+ tokenizeAndTestExact("a:a.b", TokenType.PREFIXED_NAME, "a", "a.b") ;
+ }
+
+ @Test
+ public void tokenUnit_pname11()
+ {
+ tokenizeAndTestExact("a:0.b", TokenType.PREFIXED_NAME, "a", "0.b") ;
+ }
+
+ @Test
+ public void tokenUnit_pname12()
+ {
+ tokenizeAndTestFirst("a:0. b", TokenType.PREFIXED_NAME, "a", "0") ;
+ }
+
+ @Test
+ public void tokenUnit_pname13()
+ {
+ // x00e9 é
+ // x0065 e and x0301 Ì
+ tokenizeAndTestExact("a:xyzé", TokenType.PREFIXED_NAME, "a", "xyz\u00e9") ;
+ }
+
+ @Test
+ public void tokenUnit_pname14()
+ {
+ // x0065 e and x0301 Ì
+ tokenizeAndTestExact("a:xyze\u0301", TokenType.PREFIXED_NAME, "a", "xyze\u0301") ;
+ }
+
+ @Test
+ public void tokenUnit_pname15()
+ {
+ // x0065 e and x0301 Ì
+ tokenizeAndTestExact("a:xe\u0301y", TokenType.PREFIXED_NAME, "a", "xe\u0301y") ;
+ }
+
// @Test
// public void tokenUnit_pname10()
// {
@@ -693,6 +734,12 @@ public class TestTokenizer extends BaseT
}
@Test
+ public void directive_1() { tokenizeAndTestExact("@prefix", TokenType.DIRECTIVE, "prefix") ; }
+
+ @Test
+ public void directive_2() { tokenizeAndTestExact("@base", TokenType.DIRECTIVE, "base") ; }
+
+ @Test
public void tokenComment_01()
{
tokenizeAndTestExact("_:123 # Comment", TokenType.BNODE, "123") ;