You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2012/11/05 14:53:05 UTC
svn commit: r1405790 - in /jena/trunk/jena-arq: ./
src/main/java/org/openjena/riot/lang/ src/main/java/org/openjena/riot/system/
src/main/java/org/openjena/riot/tokens/
src/test/java/org/openjena/riot/tokens/
Author: andy
Date: Mon Nov 5 13:53:04 2012
New Revision: 1405790
URL: http://svn.apache.org/viewvc?rev=1405790&view=rev
Log:
RIOT tokenizing now records the subtokens for language and datatype literals.
Add strict testing of token type for N-Triples (when in strict mode).
Removed:
jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerBytes.java
Modified:
jena/trunk/jena-arq/ReleaseNotes.txt
jena/trunk/jena-arq/src/main/java/org/openjena/riot/lang/LangNTuple.java
jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/ParserProfileBase.java
jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/Token.java
jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerFactory.java
jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerText.java
jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenForNode.java
jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenizer.java
Modified: jena/trunk/jena-arq/ReleaseNotes.txt
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/ReleaseNotes.txt?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/ReleaseNotes.txt (original)
+++ jena/trunk/jena-arq/ReleaseNotes.txt Mon Nov 5 13:53:04 2012
@@ -11,6 +11,7 @@ ChangeLog for ARQ
+ Update Turtle/Trig parsers for prefixed name details in RDF 1.1
Add %-sequences, \char sequences and ':' to local part of prefixed names.
NB. The characters 123. are now an integer 123 and a DOT; they were a decimal, and no DOT.
++ RIOT tokenizing now records the subtokens for language and datatype literals.
==== ARQ 2.9.4
Modified: jena/trunk/jena-arq/src/main/java/org/openjena/riot/lang/LangNTuple.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/lang/LangNTuple.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/lang/LangNTuple.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/lang/LangNTuple.java Mon Nov 5 13:53:04 2012
@@ -24,6 +24,7 @@ import org.openjena.atlas.lib.Sink ;
import org.openjena.riot.system.ParserProfile ;
import org.openjena.riot.tokens.Token ;
import org.openjena.riot.tokens.TokenType ;
+import static org.openjena.riot.tokens.TokenType.* ;
import org.openjena.riot.tokens.Tokenizer ;
import org.slf4j.Logger ;
import org.slf4j.LoggerFactory ;
@@ -119,8 +120,14 @@ public abstract class LangNTuple<X> exte
case IRI:
case BNODE:
case STRING2:
+ return ;
case LITERAL_DT:
+ if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) )
+ exception(token, "Illegal single quoted string: %s", token) ;
+ return ;
case LITERAL_LANG:
+ if ( profile.isStrictMode() && ! token.getSubToken1().hasType(STRING2) )
+ exception(token, "Illegal single quoted string: %s", token) ;
return ;
case STRING1:
if ( profile.isStrictMode() )
Modified: jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/ParserProfileBase.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/ParserProfileBase.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/ParserProfileBase.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/system/ParserProfileBase.java Mon Nov 5 13:53:04 2012
@@ -173,7 +173,7 @@ public class ParserProfileBase implement
return createTypedLiteral(str, XSDDatatype.XSDinteger, line, col) ;
case LITERAL_DT :
{
- Token tokenDT = token.getSubToken() ;
+ Token tokenDT = token.getSubToken2() ;
String uriStr ;
switch(tokenDT.getType())
Modified: jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/Token.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/Token.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/Token.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/Token.java Mon Nov 5 13:53:04 2012
@@ -61,10 +61,22 @@ import com.hp.hpl.jena.vocabulary.XSD ;
public final class Token
{
+ // Some tokens are "multipart"
+ // A language tag is a sub-token string and token part.
+ // It uses subToken1, and image2.
+ // A datatype literal is two tokens
+ // It uses subToken1, subToken2 and sets image to the lexical part.
+ // A prefixed name is two strings.
+ // It uses tokenImage and tokenImage2
+
private TokenType tokenType = null ;
+
private String tokenImage = null ;
- private String tokenImage2 = null ; // Used for language tag and second part of prefix name
- private Token subToken = null ; // A related token (used for datatype literals)
+ private String tokenImage2 = null ; // Used for language tag and second part of prefix name
+
+ private Token subToken1 = null ; // A related token (used for datatype literals and language tags)
+ private Token subToken2 = null ; // A related token (used for datatype literals and language tags)
+
public int cntrlCode = 0 ;
private long column ;
private long line ;
@@ -74,20 +86,25 @@ public final class Token
public static final String ImageTrue = "true" ;
public static final String ImageFalse = "false" ;
- public final TokenType getType() { return tokenType ; }
- public final String getImage() { return tokenImage ; }
+ public final TokenType getType() { return tokenType ; }
+ public final String getImage() { return tokenImage ; }
//public final String getImage1() { return tokenImage1 ; }
- public final String getImage2() { return tokenImage2 ; }
- public final int getCntrlCode() { return cntrlCode ; }
- public final Token getSubToken() { return subToken ; }
+
+ public final String getImage2() { return tokenImage2 ; }
+ public final int getCntrlCode() { return cntrlCode ; }
+ public final Token getSubToken1() { return subToken1 ; }
+ public final Token getSubToken2() { return subToken2 ; }
public final Token setType(TokenType tokenType) { this.tokenType = tokenType ; return this ; }
public final Token setImage(String tokenImage) { this.tokenImage = tokenImage ; return this ; }
public final Token setImage(char tokenImage) { this.tokenImage = String.valueOf(tokenImage) ; return this ; }
- //public final Token setImage1(String tokenImage1) { this.tokenImage1 = tokenImage1 ; return this ; }
+
public final Token setImage2(String tokenImage2) { this.tokenImage2 = tokenImage2 ; return this ; }
+
public final Token setCntrlCode(int cntrlCode) { this.cntrlCode = cntrlCode ; return this ; }
- public final Token setSubToken(Token subToken) { this.subToken = subToken ; return this ; }
+
+ public final Token setSubToken1(Token subToken) { this.subToken1 = subToken ; return this ; }
+ public final Token setSubToken2(Token subToken) { this.subToken2 = subToken ; return this ; }
static Token create(String s)
{
@@ -120,27 +137,42 @@ public final class Token
{
return line ;
}
+
+ Token(String string) { this(STRING, string) ; }
- private Token(TokenType type) { this(type, null, null, null) ; }
+ Token(TokenType type) { this(type, null, null) ; }
+
+ Token(TokenType type, String image1) { this(type, image1, null) ; }
+
+ Token(TokenType type, String image1, String image2)
+ {
+ this() ;
+ setType(type) ;
+ setImage(image1) ;
+ setImage2(image2) ;
+ }
- private Token(TokenType type, String image1) { this(type, image1, null, null) ; }
- private Token(TokenType type, String image1, String image2)
- { this(type, image1, image2, null) ; }
-
- private Token(TokenType type, String image1, Token subToken)
- { this(type, image1, null, subToken) ; }
-
-
- private Token(TokenType type, String image1, String image2, Token subToken)
+// private Token(TokenType type) { this(type, null, null, null) ; }
+//
+// private Token(TokenType type, String image1) { this(type, image1, null, null) ; }
+//
+// private Token(TokenType type, String image1, String image2)
+// { this(type, image1, image2, null) ; }
+//
+// private Token(TokenType type, String image1, Token subToken)
+// { this(type, image1, null, subToken) ; }
+//
+//
+ private Token(TokenType type, String image1, String image2, Token subToken1, Token subToken2)
{
this() ;
setType(type) ;
setImage(image1) ;
setImage2(image2) ;
- setSubToken(subToken) ;
+ setSubToken1(subToken1) ;
+ setSubToken2(subToken2) ;
}
-
private Token() { this(-1, -1) ; }
@@ -148,12 +180,12 @@ public final class Token
public Token(Token token)
{
- this.tokenType = token.tokenType ;
- this.tokenImage = token.tokenImage ;
- this.tokenImage2 = token.tokenImage2 ;
- this.cntrlCode = token.cntrlCode ;
- this.line = token.line ;
- this.column = token.column ;
+ this(token.tokenType,
+ token.tokenImage, token.tokenImage2,
+ token.subToken1, token.subToken2) ;
+ this.cntrlCode = token.cntrlCode ;
+ this.line = token.line ;
+ this.column = token.column ;
}
public int asInt() {
@@ -214,23 +246,32 @@ public final class Token
sb.append(delim1) ;
sb.append(getImage()) ;
sb.append(delim1) ;
+ }
- if ( getImage2() != null )
- {
- sb.append(":") ;
- sb.append(delim2) ;
- sb.append(getImage2()) ;
- sb.append(delim2) ;
- }
- if ( getSubToken() != null )
- {
- sb.append(";") ;
- sb.append(delim2) ;
- sb.append(getSubToken().toString()) ;
- sb.append(delim2) ;
- }
-
+ if ( getImage2() != null )
+ {
+ sb.append(":") ;
+ sb.append(delim2) ;
+ sb.append(getImage2()) ;
+ sb.append(delim2) ;
}
+
+ if ( getSubToken1() != null )
+ {
+ sb.append(";") ;
+ sb.append(delim2) ;
+ sb.append(getSubToken1().toString()) ;
+ sb.append(delim2) ;
+ }
+
+ if ( getSubToken2() != null )
+ {
+ sb.append(";") ;
+ sb.append(delim2) ;
+ sb.append(getSubToken2().toString()) ;
+ sb.append(delim2) ;
+ }
+
if ( getCntrlCode() != 0 )
{
sb.append(":") ;
@@ -388,14 +429,17 @@ public final class Token
case INTEGER: return Node.createLiteral(tokenImage, null, XSDDatatype.XSDinteger) ;
case LITERAL_DT :
{
- if ( pmap == null && getSubToken().hasType(TokenType.PREFIXED_NAME) )
+ Token lexToken = getSubToken1() ;
+ Token dtToken = getSubToken2() ;
+
+ if ( pmap == null && dtToken.hasType(TokenType.PREFIXED_NAME) )
// Must be able to resolve the datattype else we can't find it's datatype.
throw new RiotException("Invalid token: "+this) ;
- Node n = getSubToken().asNode(pmap);
+ Node n = dtToken.asNode(pmap);
if ( ! n.isURI() )
throw new RiotException("Invalid token: "+this) ;
RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(n.getURI()) ;
- return Node.createLiteral(tokenImage, null, dt) ;
+ return Node.createLiteral(lexToken.getImage(), null, dt) ;
}
case LITERAL_LANG : return Node.createLiteral(tokenImage, tokenImage2, null) ;
case STRING:
@@ -569,12 +613,19 @@ public final class Token
// Has datatype.
Node dt = Node.createURI(datatype) ;
- Token subToken = tokenForNode(dt) ;
- return new Token(LITERAL_DT, s, subToken) ;
+ Token subToken1 = new Token(STRING, s) ;
+ Token subToken2 = tokenForNode(dt) ;
+ Token t = new Token(LITERAL_DT, s) ;
+ t.setSubToken1(subToken1) ;
+ t.setSubToken2(subToken2) ;
+ return t ;
}
if ( lang != null && lang.length()>0)
- return new Token(LITERAL_LANG, s, lang) ;
+ {
+ Token lex = new Token(s) ;
+ return new Token(LITERAL_LANG, s, lang, lex, null) ;
+ }
// Plain.
return new Token(STRING, s) ;
Modified: jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerFactory.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerFactory.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerFactory.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerFactory.java Mon Nov 5 13:53:04 2012
@@ -22,11 +22,9 @@ import java.io.ByteArrayInputStream ;
import java.io.InputStream ;
import java.io.Reader ;
-import org.openjena.atlas.io.PeekInputStream ;
import org.openjena.atlas.io.PeekReader ;
import org.openjena.atlas.lib.StrUtils ;
-
public class TokenizerFactory
{
/** Discouraged - be careful about character sets */
@@ -39,16 +37,6 @@ public class TokenizerFactory
public static Tokenizer makeTokenizerUTF8(InputStream in)
{
- if ( false )
- {
- // Byte parser - historical.
- // Might be faster. Slightly imperfect - better to convert to chars, then parse.
- // No BOM handling.
- PeekInputStream pin = PeekInputStream.make(in) ;
- Tokenizer tokenizer = new TokenizerBytes(pin) ;
- return tokenizer ;
- }
-
// BOM will have been removed
PeekReader peekReader = PeekReader.makeUTF8(in) ;
Tokenizer tokenizer = new TokenizerText(peekReader) ;
Modified: jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerText.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerText.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerText.java (original)
+++ jena/trunk/jena-arq/src/main/java/org/openjena/riot/tokens/TokenizerText.java Mon Nov 5 13:53:04 2012
@@ -258,8 +258,15 @@ public final class TokenizerText impleme
if ( reader.peekChar() == CH_AT )
{
reader.readChar() ;
- token.setImage2(langTag()) ;
- token.setType(TokenType.LITERAL_LANG) ;
+
+ Token mainToken = new Token(token) ;
+ mainToken.setType(TokenType.LITERAL_LANG) ;
+ mainToken.setSubToken1(token) ;
+ mainToken.setImage2(langTag()) ;
+ token = mainToken ;
+
+// token.setImage2(langTag()) ;
+// token.setType(TokenType.LITERAL_LANG) ;
if ( Checking ) checkLiteralLang(token.getImage(), token.getImage2()) ;
}
else if ( reader.peekChar() == '^' )
@@ -276,14 +283,18 @@ public final class TokenizerText impleme
// exception("Datatype URI required after ^^ - URI or prefixed name expected") ;
// Stash current token.
- Token mainToken = token ;
+ Token mainToken = new Token(token) ;
+ mainToken.setSubToken1(token) ;
+ mainToken.setImage(token.getImage()) ;
+
Token subToken = parseToken() ;
if ( ! subToken.isIRI() )
exception("Datatype URI required after ^^ - URI or prefixed name expected") ;
+ mainToken.setSubToken2(subToken) ;
+ mainToken.setType(TokenType.LITERAL_DT) ;
+
token = mainToken ;
- token.setSubToken(subToken) ;
- token.setType(TokenType.LITERAL_DT) ;
if ( Checking ) checkLiteralDT(token.getImage(), subToken) ;
}
else
Modified: jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenForNode.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenForNode.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenForNode.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenForNode.java Mon Nov 5 13:53:04 2012
@@ -25,7 +25,6 @@ import org.openjena.riot.system.PrefixMa
import com.hp.hpl.jena.graph.Node ;
import com.hp.hpl.jena.rdf.model.AnonId ;
-import com.hp.hpl.jena.sparql.sse.SSE ;
import com.hp.hpl.jena.sparql.util.NodeFactory ;
public class TestTokenForNode extends BaseTest
@@ -39,71 +38,61 @@ public class TestTokenForNode extends Ba
}
@Test public void tokenForNode01()
- { test( "'x'", TokenType.STRING, "x", null, null) ; }
+ { test( "'x'", TokenType.STRING, "x", null, null, null) ; }
@Test public void tokenForNode02()
- { test( "<x>", TokenType.IRI, "x", null, null) ; }
+ { test( "<x>", TokenType.IRI, "x", null, null, null) ; }
@Test public void tokenForNode03()
- { test( "'en'@lang", TokenType.LITERAL_LANG, "en", "lang", null) ; }
+ { test( "'en'@lang", TokenType.LITERAL_LANG, "en", "lang", new Token(TokenType.STRING, "en"), null) ; }
@Test public void tokenForNode04()
{
Token sub = new Token(-1,-1) ;
sub.setType(TokenType.IRI) ;
sub.setImage("dtype") ;
- test( "'lex'^^<dtype>", TokenType.LITERAL_DT, "lex", null, sub) ;
+ test( "'lex'^^<dtype>", TokenType.LITERAL_DT, "lex", null, new Token(TokenType.STRING, "lex"), sub) ;
}
@Test public void tokenForNode05()
- { test( "<http://localhost/foo>", TokenType.IRI, "foo", null, null) ; }
+ { test( "<http://localhost/foo>", TokenType.IRI, "foo", null, null, null) ; }
@Test public void tokenForNode06()
- { test( "<http://example/bar>", TokenType.PREFIXED_NAME, "ex", "bar", null) ; }
+ { test( "<http://example/bar>", TokenType.PREFIXED_NAME, "ex", "bar", null, null) ; }
@Test public void tokenForNode07()
- { test( "123", TokenType.INTEGER, "123", null, null) ; }
+ { test( Node.createAnon(new AnonId("abc")), TokenType.BNODE, "abc", null, null, null ) ; }
@Test public void tokenForNode08()
- { test( "123.0", TokenType.DECIMAL, "123.0", null, null) ; }
-
- @Test public void tokenForNode09()
- { test( "12e0", TokenType.DOUBLE, "12e0", null, null) ; }
+ { test( Node.ANY, TokenType.KEYWORD, "ANY", null, null, null) ; }
+
+ // Short forms.
+
+ @Test public void tokenForNode20()
+ { test( "123", TokenType.INTEGER, "123", null, null, null) ; }
- @Test public void tokenForNode10()
- { test( Node.createAnon(new AnonId("abc")), TokenType.BNODE, "abc", null, null) ; }
+ @Test public void tokenForNode21()
+ { test( "123.0", TokenType.DECIMAL, "123.0", null, null, null) ; }
- @Test public void tokenForNode11()
- { test( Node.ANY, TokenType.KEYWORD, "ANY", null, null) ; }
+ @Test public void tokenForNode22()
+ { test( "12e0", TokenType.DOUBLE, "12e0", null, null, null) ; }
private static void test(String nodeStr,
- TokenType type, String image, String image2, Token subToken)
+ TokenType type, String image, String image2, Token subToken1, Token subToken2)
{
Node n = NodeFactory.parseNode(nodeStr) ;
- test(n, type, image, image2, subToken) ;
+ test(n, type, image, image2, subToken1, subToken2) ;
}
private static void test(Node node,
- TokenType type, String image, String image2, Token subToken)
+ TokenType type, String image, String image2, Token subToken1, Token subToken2)
{
Token t = Token.tokenForNode(node, base, prefixMap) ;
assertEquals(type, t.getType()) ;
assertEquals(image, t.getImage()) ;
assertEquals(image2, t.getImage2()) ;
- assertEquals(subToken, t.getSubToken()) ;
- }
-
- private static void test(String nodeStr, PrefixMap pmap,
- TokenType type, String image, String image2, Token subToken)
- {
- // Use SSE to be clear we expect it to be a different node parser to calling Tokenizer.
- Node n = SSE.parseNode(nodeStr) ;
- Token t = Token.tokenForNode(n) ;
- assertEquals(type, t.getType()) ;
- assertEquals(image, t.getImage()) ;
- assertEquals(image2, t.getImage2()) ;
- assertEquals(subToken, t.getSubToken()) ;
+ assertEquals(subToken1, t.getSubToken1()) ;
+ assertEquals(subToken2, t.getSubToken2()) ;
}
-
}
Modified: jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenizer.java
URL: http://svn.apache.org/viewvc/jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenizer.java?rev=1405790&r1=1405789&r2=1405790&view=diff
==============================================================================
--- jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenizer.java (original)
+++ jena/trunk/jena-arq/src/test/java/org/openjena/riot/tokens/TestTokenizer.java Mon Nov 5 13:53:04 2012
@@ -40,7 +40,7 @@ public class TestTokenizer extends BaseT
return tokenizer ;
}
- private static void token(String string)
+ private static void tokenFirst(String string)
{
Tokenizer tokenizer = tokenizer(string) ;
assertTrue(tokenizer.hasNext()) ;
@@ -51,7 +51,7 @@ public class TestTokenizer extends BaseT
- private static Token token_XX(String string)
+ private static Token tokenFor(String string)
{
Tokenizer tokenizer = tokenizer(string) ;
assertTrue(tokenizer.hasNext()) ;
@@ -73,7 +73,20 @@ public class TestTokenizer extends BaseT
return token ;
}
-
+ private static Token tokenizeAndTestExact(String input, TokenType tokenType,
+ String tokenImage1, String tokenImage2,
+ Token subToken1, Token subToken2)
+ {
+ Token token = tokenFor(input) ;
+ assertEquals(tokenType, token.getType()) ;
+ assertEquals(tokenImage1, token.getImage()) ;
+ assertEquals(tokenImage2, token.getImage2()) ;
+ assertEquals(subToken1, token.getSubToken1()) ;
+ assertEquals(subToken2, token.getSubToken2()) ;
+ return token ;
+ }
+
+
private static Tokenizer tokenizeAndTestFirst(String input, TokenType tokenType, String tokenImage)
{
return tokenizeAndTestFirst(input, tokenType, tokenImage, null) ;
@@ -107,6 +120,21 @@ public class TestTokenizer extends BaseT
return token ;
}
+ private static Token tokenizeAndTest(String input, TokenType tokenType,
+ String tokenImage1, String tokenImage2,
+ Token subToken1, Token subToken2)
+ {
+ Token token = tokenFor(input) ;
+ assertNotNull(token) ;
+ assertEquals(tokenType, token.getType()) ;
+ assertEquals(tokenImage1, token.getImage()) ;
+ assertEquals(tokenImage2, token.getImage2()) ;
+ assertEquals(subToken1, token.getSubToken1()) ;
+ assertEquals(subToken2, token.getSubToken2()) ;
+ return token ;
+ }
+
+
@Test public void tokenUnit_iri1() { tokenizeAndTestExact("<x>", TokenType.IRI, "x") ; }
@Test public void tokenUnit_iri2() { tokenizeAndTestExact(" <> ", TokenType.IRI, "") ; }
@@ -116,7 +144,7 @@ public class TestTokenizer extends BaseT
{
try {
// That's one \
- token("<abc\\>def>") ;
+ tokenFirst("<abc\\>def>") ;
} catch (RiotParseException ex)
{
String x = ex.getMessage() ;
@@ -190,7 +218,7 @@ public class TestTokenizer extends BaseT
@Test(expected = RiotParseException.class)
public void tokenUnit_str9()
{
- token("'abc") ;
+ tokenFirst("'abc") ;
}
@Test
@@ -263,13 +291,13 @@ public class TestTokenizer extends BaseT
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long10()
{
- token("\"\"\"abcdef") ;
+ tokenFirst("\"\"\"abcdef") ;
}
@Test(expected = RiotParseException.class)
public void tokenUnit_str_long11()
{
- token("'''") ;
+ tokenFirst("'''") ;
}
@Test
@@ -601,7 +629,7 @@ public class TestTokenizer extends BaseT
@Test(expected = RiotParseException.class)
public void tokenUnit_hex3()
{
- token("0xXYZ") ;
+ tokenFirst("0xXYZ") ;
}
@Test public void tokenUnit_hex4()
@@ -609,17 +637,30 @@ public class TestTokenizer extends BaseT
tokenizeAndTestExact("0Xabc", TokenType.HEX, "0Xabc") ;
}
- private static void tokenizeAndTestLiteralDT(String input, String image, TokenType dt, String image1, String image2)
+ private static void tokenizeAndTestLiteralDT(String input, TokenType lexType, String image, TokenType dt, String dtImage1, String dtImage2)
{
- Token token2 = tokenizeAndTestExact(input, TokenType.LITERAL_DT, image).getSubToken() ;
+ Token lexToken = new Token(lexType, image) ;
+ Token dtToken = new Token(dt, dtImage1, dtImage2) ;
+ tokenizeAndTest(input, TokenType.LITERAL_DT, image, null, lexToken, dtToken) ;
+
+ Token expectedToken = new Token(TokenType.LITERAL_DT) ;
+ expectedToken.setImage(image) ;
+ expectedToken.setImage2(null) ;
+ expectedToken.setSubToken1(lexToken) ;
+ expectedToken.setSubToken2(dtToken) ;
+
+ Token token = tokenFor(input) ;
+ assertEquals(expectedToken, token) ;
+
+ Token token2 = tokenizeAndTestExact(input, TokenType.LITERAL_DT, image).getSubToken2() ;
assertEquals(dt, token2.getType()) ;
- assertEquals(image1, token2.getImage()) ;
- assertEquals(image2, token2.getImage2()) ;
+ assertEquals(dtImage1, token2.getImage()) ;
+ assertEquals(dtImage2, token2.getImage2()) ;
}
@Test public void tokenLiteralDT_0()
{
- tokenizeAndTestLiteralDT("'123'^^<x> ", "123", TokenType.IRI, "x", null) ;
+ tokenizeAndTestLiteralDT("\"123\"^^<x> ", TokenType.STRING2, "123", TokenType.IRI, "x", null) ;
}
// literal test function.
@@ -627,23 +668,21 @@ public class TestTokenizer extends BaseT
@Test
public void tokenLiteralDT_1()
{
- tokenizeAndTestLiteralDT("'123'^^x:y ", "123", TokenType.PREFIXED_NAME, "x", "y") ;
+ tokenizeAndTestLiteralDT("'123'^^x:y ", TokenType.STRING1, "123", TokenType.PREFIXED_NAME, "x", "y") ;
}
@Test
public void tokenLiteralDT_2()
{
- tokenizeAndTestLiteralDT("'123'^^:y", "123", TokenType.PREFIXED_NAME, "", "y") ;
+ tokenizeAndTestLiteralDT("'123'^^:y", TokenType.STRING1, "123", TokenType.PREFIXED_NAME, "", "y") ;
}
@Test
public void tokenLiteralDT_3()
{
- tokenizeAndTestLiteralDT("'''123'''^^<xyz>", "123", TokenType.IRI, "xyz", null) ;
+ tokenizeAndTestLiteralDT("'''123'''^^<xyz>", TokenType.LONG_STRING1, "123", TokenType.IRI, "xyz", null) ;
}
-
-
@Test(expected = RiotParseException.class)
public void tokenLiteralDT_bad_1()
{
@@ -708,19 +747,19 @@ public class TestTokenizer extends BaseT
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_3()
{
- token("''@ lang ") ;
+ tokenFirst("''@ lang ") ;
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_4()
{
- token("''@lang- ") ;
+ tokenFirst("''@lang- ") ;
}
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_5()
{
- token("'abc'@- ") ;
+ tokenFirst("'abc'@- ") ;
}
@Test
@@ -738,7 +777,7 @@ public class TestTokenizer extends BaseT
@Test(expected = RiotParseException.class)
public void tokenLiteralLang_8()
{
- token("''@9-b") ;
+ tokenFirst("''@9-b") ;
}
@Test