You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2020/06/28 09:23:03 UTC

[jena] branch master updated: JENA-1924: Test for ucschar in tokenizer.

This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/master by this push:
     new f24f970  JENA-1924: Test for ucschar in tokenizer.
     new 0f9cd55  Merge pull request #768 from afs/ucschar
f24f970 is described below

commit f24f9709861fac52c58ebb4fc45126db0e50d2c9
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Thu Jun 25 18:56:16 2020 +0100

    JENA-1924: Test for ucschar in tokenizer.
---
 .../jena/atlas/json/io/parser/TokenizerJSON.java   |   2 +-
 .../main/java/org/apache/jena/riot/RDFParser.java  |  10 +-
 .../org/apache/jena/riot/lang/RiotParsers.java     |   9 +-
 .../jena/riot/system/ErrorHandlerFactory.java      |  35 +++-
 .../org/apache/jena/riot/system/RiotChars.java     |   3 +-
 .../jena/riot/system/stream/LocationMapper.java    |   6 +
 .../jena/riot/system/stream/StreamManager.java     |   5 +
 .../jena/riot/tokens/ErrorHandlerTokenizer.java    |  38 ++++
 .../java/org/apache/jena/riot/tokens/Token.java    |   7 +-
 .../jena/riot/tokens/TokenizeTextBuilder.java      | 126 +++++++++++++
 .../apache/jena/riot/tokens/TokenizerFactory.java  |  63 ++++---
 .../org/apache/jena/riot/tokens/TokenizerText.java | 196 ++++++++++++---------
 .../jena/riot/lang/AbstractTestLangNTuples.java    |   2 +-
 .../org/apache/jena/riot/lang/TestLangTrig.java    |   8 +-
 .../org/apache/jena/riot/lang/TestLangTurtle.java  |   7 +-
 .../org/apache/jena/riot/tokens/TestTokenizer.java |  12 +-
 jena-arq/testing/RIOT/Lang/Changes                 |  15 ++
 jena-arq/testing/RIOT/Lang/TrigStd/manifest.ttl    |   5 +-
 jena-arq/testing/RIOT/Lang/TurtleStd/manifest.ttl  |   5 +-
 .../java/org/apache/jena/util/FileManagerImpl.java |   5 +-
 .../main/java/org/apache/jena/dboe/sys/Sys.java    |   6 +-
 .../java/org/apache/jena/tdb2/sys/SystemTDB.java   |  36 +---
 .../test/java/org/apache/jena/tdb2/ConfigTest.java |   4 +-
 .../java/org/apache/jena/fuseki/TestAdminAPI.java  |   3 +-
 .../main/java/org/apache/jena/iri/impl/Parser.java |  24 ++-
 25 files changed, 431 insertions(+), 201 deletions(-)

diff --git a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java
index 13eea4c..780822b 100644
--- a/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java
+++ b/jena-arq/src/main/java/org/apache/jena/atlas/json/io/parser/TokenizerJSON.java
@@ -105,7 +105,7 @@ public class TokenizerJSON implements Tokenizer
     public void remove()
     { throw new UnsupportedOperationException() ; }
 
-    // ---- Machinary
+    // ---- Machinery
     
     // ""-string, ''-string, *X, 
     // various single characters . , : ; 
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java
index e9318de..5794c65 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFParser.java
@@ -375,11 +375,10 @@ public class RDFParser {
         }
         
         TypedInputStream in;
+        // Need more control than LocatorURL provides to get the Accept header in and the HttpCLient.
+        // So map now.
         urlStr = streamManager.mapURI(urlStr);
         if ( urlStr.startsWith("http://") || urlStr.startsWith("https://") ) {
-            // Need more control than LocatorURL provides. We could use it for the
-            // httpClient == null case.
-            //  
             // HttpOp.execHttpGet(,acceptHeader,) overrides the HttpClient default setting.
             // 
             // If there is an explicitly set HttpClient use that as given, and do not override
@@ -388,8 +387,9 @@ public class RDFParser {
             String acceptHeader = 
                 ( httpClient == null ) ? WebContent.defaultRDFAcceptHeader : null; 
             in = HttpOp.execHttpGet(urlStr, acceptHeader, httpClient, null);
-        } else { 
-            in = streamManager.open(urlStr);
+        } else {
+            // Already mapped.
+            in = streamManager.openNoMapOrNull(urlStr);
         }
         if ( in == null )
             throw new RiotNotFoundException("Not found: "+urlStr);
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
index 8e8df94..56d4035 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/lang/RiotParsers.java
@@ -53,8 +53,7 @@ public class RiotParsers {
             Tokenizer tokenizer = new TokenizerJSON(PeekReader.makeUTF8(input));
             return createParserRdfJson(tokenizer, dest, profile);
         }
-
-        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input);
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
         if ( RDFLanguages.sameLang(TURTLE, lang) || RDFLanguages.sameLang(N3,  lang) ) 
             return createParserTurtle(tokenizer, dest, profile);
         if ( RDFLanguages.sameLang(NTRIPLES, lang) )
@@ -74,7 +73,7 @@ public class RiotParsers {
         }
 
         @SuppressWarnings("deprecation")
-        Tokenizer tokenizer = TokenizerFactory.makeTokenizer(input);
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizer(input, profile.getErrorHandler());
         if ( RDFLanguages.sameLang(TURTLE, lang) || RDFLanguages.sameLang(N3,  lang) ) 
             return createParserTurtle(tokenizer, dest, profile);
         if ( RDFLanguages.sameLang(NTRIPLES, lang) )
@@ -121,7 +120,7 @@ public class RiotParsers {
     /** Create an iterator for parsing N-Triples. */
     public static Iterator<Triple> createIteratorNTriples(InputStream input, StreamRDF dest, ParserProfile profile) {
         // LangNTriples supports iterator use.
-        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input);
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
         return createParserNTriples(tokenizer, null, profile);
     }
 
@@ -133,7 +132,7 @@ public class RiotParsers {
     /** Create an iterator for parsing N-Quads. */
     public static Iterator<Quad> createIteratorNQuads(InputStream input, StreamRDF dest, ParserProfile profile) {
         // LangNQuads supports iterator use.
-        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input);
+        Tokenizer tokenizer = TokenizerFactory.makeTokenizerUTF8(input, profile.getErrorHandler());
         return createParserNQuads(tokenizer, null,  profile);
     }
 }
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/ErrorHandlerFactory.java b/jena-arq/src/main/java/org/apache/jena/riot/system/ErrorHandlerFactory.java
index 8c9bd3e..e5c1633 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/ErrorHandlerFactory.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/ErrorHandlerFactory.java
@@ -72,8 +72,13 @@ public class ErrorHandlerFactory
      * An error handler that throws a {@link RiotParseException}, hence it
      * exposes the details of errors.
      */
-    public static ErrorHandler errorHandlerDetailed()         { return new ErrorHandlerRiotParseException() ; }
+    public static ErrorHandler errorHandlerDetailed()           { return new ErrorHandlerRiotParseErrors() ; }
 
+    /**
+     * An error handler that throws exceptions in all cases.
+     */
+    public static ErrorHandler errorHandlerExceptions()        { return new ErrorHandlerRiotParseException() ; }
+    
     private static ErrorHandler defaultErrorHandler = errorHandlerStd ;
     /** Get the current default error handler */
     public static ErrorHandler getDefaultErrorHandler() { return defaultErrorHandler ; }
@@ -130,8 +135,9 @@ public class ErrorHandlerFactory
 
         /** report a warning */
         @Override
-        public void warning(String message, long line, long col)
-        { logWarning(message, line, col) ; }
+        public void warning(String message, long line, long col) {
+            logWarning(message, line, col);
+        }
 
         /** report an error */
         @Override
@@ -304,8 +310,10 @@ public class ErrorHandlerFactory
     }
 
     /** An error handler that throws a RiotParseException, hence it exposes the details of errors. */
-    private static class ErrorHandlerRiotParseException implements ErrorHandler {
-        public ErrorHandlerRiotParseException() {}
+    private static class ErrorHandlerRiotParseErrors implements ErrorHandler {
+
+        public ErrorHandlerRiotParseErrors() {}
+
         @Override public void warning(String message, long line, long col) { }
 
         @Override public void error(String message, long line, long col) {
@@ -316,5 +324,22 @@ public class ErrorHandlerFactory
             throw new RiotParseException(message, line, col);
         }
     }
+    
+    /** An error handler that throws a RiotParseException in all cases. */
+    private static class ErrorHandlerRiotParseException implements ErrorHandler {
+        
+        public ErrorHandlerRiotParseException() {}
+        
+        @Override public void warning(String message, long line, long col) {
+            throw new RiotParseException(message, line, col);
+        }
 
+        @Override public void error(String message, long line, long col) {
+            throw new RiotParseException(message, line, col);
+        }
+
+        @Override public void fatal(String message, long line, long col) {
+            throw new RiotParseException(message, line, col);
+        }
+    }
 }
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/RiotChars.java b/jena-arq/src/main/java/org/apache/jena/riot/system/RiotChars.java
index 4255384..449c52d 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/RiotChars.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/RiotChars.java
@@ -141,7 +141,8 @@ Notes: PN_CHARS_BASE has a hole above #xD800 -- these are the  surrogate pairs
 
     private static boolean r(int ch, int a, int b) { return ( ch >= a && ch <= b ); }
 
-    public static boolean range(int ch, char a, char b) {
+    /** Test whether a codepoint is a given range (both ends inclusive)*/
+    public static boolean range(int ch, int a, int b) {
         return (ch >= a && ch <= b);
     }
 }
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocationMapper.java b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocationMapper.java
index 1412e91..70817e9 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocationMapper.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/LocationMapper.java
@@ -68,6 +68,10 @@ public class LocationMapper
         this.altPrefixes.putAll(lmap2.altPrefixes) ;
     }
 
+    public boolean containsMapping(String uri) {
+        return altMapping(uri, null) != null;
+    }
+
     public String altMapping(String uri) {
         return altMapping(uri, uri) ;
     }
@@ -82,6 +86,8 @@ public class LocationMapper
      * @return The alternative location chosen
      */
     public String altMapping(String uri, String otherwise) {
+        if ( altLocations.isEmpty() && altPrefixes.isEmpty() )
+            return otherwise;
         if ( altLocations.containsKey(uri) )
             return altLocations.get(uri) ;
         String newStart = null ;
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/StreamManager.java b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/StreamManager.java
index 2553a5c..ff6aa4e 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/system/stream/StreamManager.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/system/stream/StreamManager.java
@@ -132,6 +132,11 @@ public class StreamManager {
         return openNoMapOrNull(uri) ;
     }
 
+    /** Test whether a mapping exists */
+    public boolean hasMapping(String filenameOrURI) {
+        return mapper.containsMapping(filenameOrURI);
+    }
+
     /** Apply the mapping of a filename or URI */
     public String mapURI(String filenameOrURI) {
         if ( mapper == null )
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/ErrorHandlerTokenizer.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/ErrorHandlerTokenizer.java
new file mode 100644
index 0000000..e48561a
--- /dev/null
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/ErrorHandlerTokenizer.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.tokens;
+
+import org.apache.jena.riot.RiotParseException;
+import org.apache.jena.riot.system.ErrorHandler;
+
+public class ErrorHandlerTokenizer implements ErrorHandler {
+    @Override public void warning(String message, long line, long col) {
+        // Warning/continue.
+        //ErrorHandlerFactory.errorHandlerStd.warning(message, line, col);
+        throw new RiotParseException(message, line, col);
+    }
+
+    @Override public void error(String message, long line, long col) {
+        throw new RiotParseException(message, line, col);
+    }
+
+    @Override public void fatal(String message, long line, long col) {
+        throw new RiotParseException(message, line, col);
+    }
+}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
index d01cc9d..bd11ce0 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/Token.java
@@ -26,7 +26,6 @@ import java.util.ArrayList ;
 import java.util.List ;
 import java.util.Objects ;
 
-import org.apache.jena.atlas.io.PeekReader ;
 import org.apache.jena.atlas.iterator.Iter ;
 import org.apache.jena.atlas.lib.Pair ;
 import org.apache.jena.datatypes.RDFDatatype ;
@@ -105,8 +104,7 @@ public final class Token
 
     static Token create(String s)
     {
-        PeekReader pr = PeekReader.readString(s) ;
-        TokenizerText tt = new TokenizerText(pr) ;
+        Tokenizer tt = TokenizerText.create().fromString(s).build();
         if ( ! tt.hasNext() )
             throw new RiotException("No token") ;
         Token t = tt.next() ;
@@ -117,8 +115,7 @@ public final class Token
 
     static Iter<Token> createN(String s)
     {
-        PeekReader pr = PeekReader.readString(s) ;
-        TokenizerText tt = new TokenizerText(pr) ;
+        Tokenizer tt = TokenizerText.create().fromString(s).build();
         List<Token> x = new ArrayList<>() ;
         while(tt.hasNext())
             x.add(tt.next()) ;
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java
new file mode 100644
index 0000000..985294d
--- /dev/null
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizeTextBuilder.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.riot.tokens;
+
+import java.io.InputStream;
+import java.io.Reader;
+
+import org.apache.jena.atlas.io.PeekReader;
+import org.apache.jena.atlas.lib.InternalErrorException;
+import org.apache.jena.riot.system.ErrorHandler;
+import org.apache.jena.riot.system.ErrorHandlerFactory;
+
+/** Builder for TokenizeText */
+public class TokenizeTextBuilder {
+    
+    // One of these.
+    private PeekReader   peekReader   = null;
+    private InputStream  input        = null;
+    private Reader       reader       = null;
+    private String       string       = null;
+    
+    private boolean      lineMode     = false;
+    private boolean      utf8         = true;
+    private ErrorHandler errorHandler = null;
+
+    TokenizeTextBuilder() {}
+
+    private void clearInput() {
+        this.peekReader = null;
+        this.input = null;
+        this.reader = null;
+        this.string = null;
+    }
+
+    public TokenizeTextBuilder source(InputStream input) {
+        clearInput();
+        this.input = input;
+        return this;
+    }
+
+    public TokenizeTextBuilder source(Reader reader) {
+        clearInput();
+        this.reader = reader;
+        return this;
+    }
+
+    public TokenizeTextBuilder source(PeekReader peekReader) {
+        clearInput();
+        this.peekReader = peekReader;
+        return this;
+    }
+
+    public TokenizeTextBuilder fromString(String string) {
+        clearInput();
+        this.string = string;
+        return this;
+    }
+
+    public TokenizeTextBuilder lineMode(boolean lineMode) {
+        this.lineMode = lineMode;
+        return this;
+    }
+
+    public TokenizeTextBuilder asciiOnly(boolean asciiOnly) {
+        this.utf8 = !asciiOnly;
+        return this;
+    }
+
+    public TokenizeTextBuilder errorHandler(ErrorHandler errorHandler) {
+        this.errorHandler = errorHandler;
+        return this;
+    }
+
+    private static int countNulls(Object ... objs) {
+        int x = 0;
+        for ( Object obj : objs )
+            if ( obj == null )
+                x++;
+        return x;
+    }
+
+    private static int countNotNulls(Object ... objs) {
+        int x = 0;
+        for ( Object obj : objs )
+            if ( obj != null )
+                x++;
+        return x;
+    }
+
+    public Tokenizer build() {
+        ErrorHandler errHandler = (errorHandler != null) ? errorHandler : ErrorHandlerFactory.errorHandlerExceptions();
+        int x = countNotNulls(peekReader, input, reader, string);
+        if ( x > 1 )
+            throw new InternalErrorException("Too many data sources");
+        PeekReader pr;
+        if ( input != null ) {
+            pr = utf8 ? PeekReader.makeUTF8(input) : PeekReader.makeASCII(input);
+        } else if ( string != null ) {
+            pr = PeekReader.readString(string);
+        } else if ( reader != null ) {
+            pr = PeekReader.make(reader);
+        } else if ( peekReader != null ) {
+            pr = peekReader;
+        } else {
+            throw new IllegalStateException("No data source");
+        }
+
+        return TokenizerText.internal(pr, lineMode, errHandler);
+    }
+}
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
index 222eb5b..ef2566ab 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerFactory.java
@@ -16,55 +16,62 @@
  * limitations under the License.
  */
 
-package org.apache.jena.riot.tokens ;
+package org.apache.jena.riot.tokens;
 
-import java.io.ByteArrayInputStream ;
-import java.io.InputStream ;
-import java.io.Reader ;
-import java.io.StringReader ;
+import java.io.InputStream;
+import java.io.Reader;
+import java.io.StringReader;
 
-import org.apache.jena.atlas.io.PeekReader ;
-import org.apache.jena.atlas.lib.StrUtils ;
+import org.apache.jena.riot.system.ErrorHandler;
 
 public class TokenizerFactory {
-    
+
+    private static ErrorHandler dftErrorHandler = null;
+
     /** Discouraged - be careful about character sets */
     @Deprecated
     public static Tokenizer makeTokenizer(Reader reader) {
-        PeekReader peekReader = PeekReader.make(reader) ;
-        Tokenizer tokenizer = new TokenizerText(peekReader) ;
-        return tokenizer ;
+        return TokenizerText.create().source(reader).build();
+    }
+
+    /** Discouraged - be careful about character sets */
+    @Deprecated
+    public static Tokenizer makeTokenizer(Reader reader, ErrorHandler errorHandler) {
+        return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
     }
 
     /** Safe use of a StringReader */
     public static Tokenizer makeTokenizer(StringReader reader) {
-        PeekReader peekReader = PeekReader.make(reader) ;
-        Tokenizer tokenizer = new TokenizerText(peekReader) ;
-        return tokenizer ;
+        return TokenizerText.create().source(reader).build();
+    }
+
+    /** Safe use of a StringReader */
+    public static Tokenizer makeTokenizer(StringReader reader, ErrorHandler errorHandler) {
+        return TokenizerText.create().source(reader).errorHandler(errorHandler).build();
     }
 
     public static Tokenizer makeTokenizerUTF8(InputStream in) {
+        return makeTokenizerUTF8(in, dftErrorHandler);
+    }
+
+    public static Tokenizer makeTokenizerUTF8(InputStream input, ErrorHandler errorHandler) {
         // BOM will be removed
-        PeekReader peekReader = PeekReader.makeUTF8(in) ;
-        Tokenizer tokenizer = new TokenizerText(peekReader) ;
-        return tokenizer ;
+        return TokenizerText.create().source(input).errorHandler(errorHandler).build();
     }
 
-    public static Tokenizer makeTokenizerASCII(InputStream in) {
-        PeekReader peekReader = PeekReader.makeASCII(in) ;
-        Tokenizer tokenizer = new TokenizerText(peekReader) ;
-        return tokenizer ;
+    public static Tokenizer makeTokenizerASCII(InputStream input) {
+        return TokenizerText.create().source(input).asciiOnly(true).build();
     }
 
-    public static Tokenizer makeTokenizerASCII(String string) {
-        byte b[] = StrUtils.asUTF8bytes(string) ;
-        ByteArrayInputStream in = new ByteArrayInputStream(b) ;
-        return makeTokenizerASCII(in) ;
+    public static Tokenizer makeTokenizerASCII(InputStream input, ErrorHandler errorHandler) {
+        return TokenizerText.create().source(input).asciiOnly(true).errorHandler(errorHandler).build();
     }
 
     public static Tokenizer makeTokenizerString(String str) {
-        PeekReader peekReader = PeekReader.readString(str) ;
-        Tokenizer tokenizer = new TokenizerText(peekReader) ;
-        return tokenizer ;
+        return TokenizerText.create().fromString(str).build();
+    }
+
+    public static Tokenizer makeTokenizerString(String str, ErrorHandler errorHandler) {
+        return TokenizerText.create().fromString(str).errorHandler(errorHandler).build();
     }
 }
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
index 5215edd..a862978 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/tokens/TokenizerText.java
@@ -22,6 +22,7 @@ import static org.apache.jena.atlas.lib.Chars.*;
 import static org.apache.jena.riot.system.RiotChars.*;
 
 import java.util.NoSuchElementException;
+import java.util.Objects;
 
 import org.apache.jena.atlas.AtlasException;
 import org.apache.jena.atlas.io.IO;
@@ -33,53 +34,40 @@ import org.apache.jena.riot.system.RiotChars;
 import org.apache.jena.sparql.ARQInternalErrorException;
 
 /** Tokenizer for all sorts of things RDF-ish */
-
 public final class TokenizerText implements Tokenizer
 {
     // Drop through to final general symbol/keyword reader, including <=, !=
     // Care with <=
     // Policy driven for CURIES?
 
-    // Various allow/deny options (via checker?)
-
-    // Space for CURIEs, stricter Turtle QNames, sane Turtle (i.e. leading digits in local part).
     public static final int CTRL_CHAR = CH_STAR;
 
-    public static boolean Checking = false;
+    // The code has the call points for checking tokens but it is generally better to
+    // do the check later in the parsing process. In case a need arises, the code
+    // remains, all compiled away by "if ( false )" (javac does not generate any
+    // bytecodes and even if it it did, JIT will remove dead branches.
+    private static final boolean Checking = false;
 
     private Token token = null;
     private final StringBuilder stringBuilder = new StringBuilder(200);
     private final PeekReader reader;
-    private final boolean lineMode;        // Whether whitespace includes or excludes NL (in its various forms).
+    // Whether whitespace between tokens includes newlines (in various forms).
+    private final boolean lineMode;        
     private boolean finished = false;
     private TokenChecker checker = null;
 
-    private static class ErrorHandlerTokenizer implements ErrorHandler {
-        @Override public void warning(String message, long line, long col) {
-            // Warning/continue.
-            //ErrorHandlerFactory.errorHandlerStd.warning(message, line, col);
-            throw new RiotParseException(message, line, col);
-        }
-
-        @Override public void error(String message, long line, long col) {
-            throw new RiotParseException(message, line, col);
-        }
-
-        @Override public void fatal(String message, long line, long col) {
-            throw new RiotParseException(message, line, col);
-        }
-    };
     // The code assumes that errors throw exception and so stop parsing.
-    private static final ErrorHandler defaultErrorHandler = new ErrorHandlerTokenizer();
-    private ErrorHandler errorHandler = defaultErrorHandler;
+    private final ErrorHandler errorHandler;
 
-    /*package*/ TokenizerText(PeekReader reader) {
-        this(reader, false);
+    public static TokenizeTextBuilder create() { return new TokenizeTextBuilder() ; } 
+    
+    /*package*/ static TokenizerText internal(PeekReader reader, boolean lineMode, ErrorHandler errorHandler) {
+        return new TokenizerText(reader, lineMode, errorHandler);
     }
-
-    /*package*/ TokenizerText(PeekReader reader, boolean lineMode) {
-        this.reader = reader;
+    private TokenizerText(PeekReader reader, boolean lineMode, ErrorHandler errorHandler) {
+        this.reader = Objects.requireNonNull(reader, "PeekReader");
         this.lineMode = lineMode;
+        this.errorHandler = Objects.requireNonNull(errorHandler, "ErrorHandler");
     }
 
     @Override
@@ -114,7 +102,6 @@ public final class TokenizerText implements Tokenizer
         }
     }
 
-
     @Override
     public final boolean eof() {
         return !hasNext();
@@ -140,28 +127,28 @@ public final class TokenizerText implements Tokenizer
     public void remove()
     { throw new UnsupportedOperationException(); }
 
-    public TokenChecker getChecker() {
-        return checker;
-    }
-
-    public void setChecker(TokenChecker checker) {
-        this.checker = checker;
-    }
-
-    public ErrorHandler getErrorHandler() {
-        return errorHandler;
-    }
-
-    public void setErrorHandler(ErrorHandler handler) {
-        this.errorHandler = handler;
-    }
+//    private TokenChecker getChecker() {
+//        return checker;
+//    }
+//
+//    private void setChecker(TokenChecker checker) {
+//        this.checker = checker;
+//    }
+//
+//    private ErrorHandler getErrorHandler() {
+//        return errorHandler;
+//    }
+//
+//    private void setErrorHandler(ErrorHandler handler) {
+//        this.errorHandler = handler;
+//    }
 
     @Override
     public void close() {
         IO.close(reader);
     }
 
-    // ---- Machinary
+    // ---- Machinery
 
     private void skip() {
         int ch = EOF;
@@ -217,7 +204,7 @@ public final class TokenizerText implements Tokenizer
                 //token.setImage("<<");
                 return token;
             }
-            error("Internal error - parsed '"+chPeek+"' after '<'");
+            fatal("Internal error - parsed '"+chPeek+"' after '<'");
         }
 
         // ---- Literal
@@ -288,7 +275,7 @@ public final class TokenizerText implements Tokenizer
 
                 Token subToken = parseToken();
                 if ( !subToken.isIRI() )
-                    error("Datatype URI required after ^^ - URI or prefixed name expected");
+                    fatal("Datatype URI required after ^^ - URI or prefixed name expected");
 
                 mainToken.setSubToken2(subToken);
                 mainToken.setType(TokenType.LITERAL_DT);
@@ -385,14 +372,14 @@ public final class TokenizerText implements Tokenizer
             case CH_VBAR:       reader.readChar(); token.setType(TokenType.VBAR);      /*token.setImage(CH_VBAR);*/ return token;
             case CH_AMPHERSAND: reader.readChar(); token.setType(TokenType.AMPHERSAND);/*token.setImage(CH_AMPHERSAND);*/ return token;
             // Specials (if blank node processing off)
-            //case CH_COLON:      reader.readChar(); token.setType(TokenType.COLON); return token;
+            //case CH_COLON:      reader.readChar(); token.setType(TokenType.COLON); /*token.setImage(COLON);*/return token;
 
             // Done above with blank nodes.
-            //case CH_UNDERSCORE: reader.readChar(); token.setType(TokenType.UNDERSCORE); /*token.setImage(CH_UNDERSCORE);*/ return token;
-            case CH_LT:         reader.readChar(); token.setType(TokenType.LT); /*token.setImage(CH_LT);*/ return token;
-            case CH_STAR:       reader.readChar(); token.setType(TokenType.STAR); /*token.setImage(CH_STAR);*/ return token;
+            //case CH_UNDERSCORE: reader.readChar(); token.setType(TokenType.UNDERSCORE);/*token.setImage(CH_UNDERSCORE);*/ return token;
+            case CH_LT:         reader.readChar(); token.setType(TokenType.LT);        /*token.setImage(CH_LT);*/ return token;
+            case CH_STAR:       reader.readChar(); token.setType(TokenType.STAR);      /*token.setImage(CH_STAR);*/ return token;
 
-            // XXX Multi character symbols
+            // XXX Multi-character symbols
             // Two character tokens && || GE >= , LE <=
             //TokenType.LE
             //TokenType.GE
@@ -483,11 +470,11 @@ public final class TokenizerText implements Tokenizer
             int ch = reader.readChar();
             switch(ch) {
                 case EOF:
-                    error("Broken IRI (End of file)"); return null;
+                    fatal("Broken IRI (End of file)"); return null;
                 case NL:
-                    error("Broken IRI (newline): %s", stringBuilder.toString()); return null;
+                    fatal("Broken IRI (newline): %s", stringBuilder.toString()); return null;
                 case CR:
-                    error("Broken IRI (CR): %s", stringBuilder.toString()); return null;
+                    fatal("Broken IRI (CR): %s", stringBuilder.toString()); return null;
                 case CH_GT:
                     // Done!
                     return stringBuilder.toString();
@@ -503,8 +490,8 @@ public final class TokenizerText implements Tokenizer
                     // Bad characters will lead to trouble elsewhere.
                     break;
                 case CH_LT:
-                    // Probably a corrupt file so not a warning.
-                    error("Bad character in IRI (bad character: '<'): <%s[<]...>", stringBuilder.toString()); return null;
+                    // Probably a corrupt file so treat as fatal.
+                    fatal("Bad character in IRI (bad character: '<'): <%s[<]...>", stringBuilder.toString()); return null;
                 case TAB:
                     error("Bad character in IRI (Tab character): <%s[tab]...>", stringBuilder.toString()); return null;
                 case '{': case '}': case '"': case '|': case '^': case '`' :
@@ -513,27 +500,55 @@ public final class TokenizerText implements Tokenizer
                     break;
                 case SPC:
                     if ( ! AllowSpacesInIRI )
+                        error("Bad character in IRI (space): <%s[space]...>", stringBuilder.toString());
+                    else
                         warning("Bad character in IRI (space): <%s[space]...>", stringBuilder.toString());
                     break;
                 default:
                     if ( ch <= 0x19 )
                         warning("Illegal character in IRI (control char 0x%02X): <%s[0x%02X]...>", ch, stringBuilder.toString(), ch);
+                    
             }
+            // JENA-1924: jena-iri does not catch this.
+            if ( ! VeryVeryLaxIRI && ch >= 0xA0 && ! isUcsChar(ch) )
+                warning("Illegal character in IRI (Not a ucschar: 0x%04X): <%s[U+%04X]...>", ch, stringBuilder.toString(), ch);
             insertCodepoint(stringBuilder, ch);
         }
     }
 
+    private static boolean isUcsChar(int ch) {
+        // RFC 3987
+        // ucschar    = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
+        //            / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
+        //            / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
+        //            / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
+        //            / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
+        //            / %xD0000-DFFFD / %xE1000-EFFFD
+        boolean b = range(ch, 0xA0, 0xD7FF)  || range(ch, 0xF900, 0xFDCF)  || range(ch, 0xFDF0, 0xFFEF);
+        if ( b )
+            return true;
+        if ( ch < 0x1000 )
+            return false;
+        // 32 bit checks.
+        return
+            range(ch, 0x10000, 0x1FFFD) || range(ch, 0x20000, 0x2FFFD) || range(ch, 0x30000, 0x3FFFD) ||
+            range(ch, 0x40000, 0x4FFFD) || range(ch, 0x50000, 0x5FFFD) || range(ch, 0x60000, 0x6FFFD) ||
+            range(ch, 0x70000, 0x7FFFD) || range(ch, 0x80000, 0x8FFFD) || range(ch, 0x90000, 0x9FFFD) ||
+            range(ch, 0xA0000, 0xAFFFD) || range(ch, 0xB0000, 0xBFFFD) || range(ch, 0xC0000, 0xCFFFD) ||
+            range(ch, 0xD0000, 0xDFFFD) || range(ch, 0xE1000, 0xEFFFD);
+    }
+    
     // Read a unicode escape : does not allow \\ bypass
     private final int readUnicodeEscape() {
         int ch = reader.readChar();
         if ( ch == EOF )
-            error("Broken escape sequence");
+            fatal("Broken escape sequence");
 
         switch (ch) {
             case 'u': return readUnicode4Escape();
             case 'U': return readUnicode8Escape();
             default:
-                error("Illegal unicode escape sequence value: \\%c (0x%02X)", ch, ch);
+                fatal("Illegal unicode escape sequence value: \\%c (0x%02X)", ch, ch);
         }
         return 0;
     }
@@ -556,7 +571,7 @@ public final class TokenizerText implements Tokenizer
         // If we made no progress, nothing found, not even a keyword -- it's an
         // error.
         if ( posn == reader.getPosition() )
-            error("Failed to find a prefix name or keyword: %c(%d;0x%04X)", ch, ch, ch);
+            fatal("Failed to find a prefix name or keyword: %c(%d;0x%04X)", ch, ch, ch);
 
         if ( Checking )
             checkKeyword(token.getImage());
@@ -681,13 +696,13 @@ public final class TokenizerText implements Tokenizer
 
             ch = reader.peekChar();
             if ( ! isHexChar(ch) )
-                error("Not a hex character: '%c'",ch);
+                fatal("Not a hex character: '%c'",ch);
             stringBuilder.append((char)ch);
             reader.readChar();
 
             ch = reader.peekChar();
             if ( ! isHexChar(ch) )
-                error("Not a hex character: '%c'",ch);
+                fatal("Not a hex character: '%c'",ch);
             stringBuilder.append((char)ch);
             reader.readChar();
         }
@@ -713,11 +728,11 @@ public final class TokenizerText implements Tokenizer
             int ch = reader.readChar();
             if ( ch == EOF ) {
                 // if ( endNL ) return stringBuilder.toString();
-                error("Broken token: " + stringBuilder.toString(), y, x);
+                fatal("Broken token: " + stringBuilder.toString(), y, x);
             }
 
             if ( ch == NL )
-                error("Broken token (newline): " + stringBuilder.toString(), y, x);
+                fatal("Broken token (newline): " + stringBuilder.toString(), y, x);
 
             if ( ch == endCh ) {
                 return stringBuilder.toString();
@@ -736,7 +751,7 @@ public final class TokenizerText implements Tokenizer
             if ( ch == EOF ) {
                 if ( endNL )
                     return stringBuilder.toString();
-                error("Broken long string");
+                fatal("Broken long string");
             }
 
             if ( ch == quoteChar ) {
@@ -828,14 +843,14 @@ public final class TokenizerText implements Tokenizer
         {
             int ch = reader.peekChar();
             if ( ch == EOF )
-                error("Blank node label missing (EOF found)");
+                fatal("Blank node label missing (EOF found)");
             if ( isWhitespace(ch) )
-                error("Blank node label missing");
+                fatal("Blank node label missing");
             // if ( ! isAlpha(ch) && ch != '_' )
             // Not strict
 
             if ( !RiotChars.isPNChars_U_N(ch) )
-                error("Blank node label does not start with alphabetic or _ :" + (char)ch);
+                fatal("Blank node label does not start with alphabetic or _ :" + (char)ch);
             reader.readChar();
             stringBuilder.append((char)ch);
         }
@@ -936,7 +951,7 @@ public final class TokenizerText implements Tokenizer
         if ( x == 0 && !isDecimal )
             // Possible a tokenizer error - should not have entered readNumber
             // in the first place.
-            error("Unrecognized as number");
+            fatal("Unrecognized as number");
 
         if ( exponent(stringBuilder) ) {
             isDouble = true;
@@ -975,7 +990,7 @@ public final class TokenizerText implements Tokenizer
             x++;
         }
         if ( x == 0 )
-            error("No hex characters after " + sb.toString());
+            fatal("No hex characters after " + sb.toString());
     }
 
     private int readDigits(StringBuilder buffer) {
@@ -1033,7 +1048,7 @@ public final class TokenizerText implements Tokenizer
         readPossibleSign(sb);
         int x = readDigits(sb);
         if ( x == 0 )
-            error("Malformed double: " + sb);
+            fatal("Malformed double: " + sb);
         return true;
     }
 
@@ -1041,7 +1056,7 @@ public final class TokenizerText implements Tokenizer
         stringBuilder.setLength(0);
         a2z(stringBuilder);
         if ( stringBuilder.length() == 0 )
-            error("Bad language tag");
+            fatal("Bad language tag");
         for (;;) {
             int ch = reader.peekChar();
             if ( ch == '-' ) {
@@ -1050,7 +1065,7 @@ public final class TokenizerText implements Tokenizer
                 int x = stringBuilder.length();
                 a2zN(stringBuilder);
                 if ( stringBuilder.length() == x )
-                    error("Bad language tag");
+                    fatal("Bad language tag");
             } else
                 break;
         }
@@ -1087,7 +1102,7 @@ public final class TokenizerText implements Tokenizer
             // Convert to UTF-16. Note that the rest of any system this is used
             // in must also respect codepoints and surrogate pairs.
             if ( !Character.isDefined(ch) && !Character.isSupplementaryCodePoint(ch) )
-                error("Illegal codepoint: 0x%04X", ch);
+                fatal("Illegal codepoint: 0x%04X", ch);
             char[] chars = Character.toChars(ch);
             buffer.append(chars);
         }
@@ -1165,7 +1180,7 @@ public final class TokenizerText implements Tokenizer
     private final int readLiteralEscape() {
         int c = reader.readChar();
         if ( c == EOF )
-            error("Escape sequence not completed");
+            fatal("Escape sequence not completed");
 
         switch (c) {
             case 'n':   return NL;
@@ -1179,7 +1194,7 @@ public final class TokenizerText implements Tokenizer
             case 'u':   return readUnicode4Escape();
             case 'U':   return readUnicode8Escape();
             default:
-                error("Illegal escape sequence value: %c (0x%02X)", c, c);
+                fatal("Illegal escape sequence value: %c (0x%02X)", c, c);
                 return 0;
         }
     }
@@ -1191,7 +1206,7 @@ public final class TokenizerText implements Tokenizer
 
         int c = reader.readChar();
         if ( c == EOF )
-            error("Escape sequence not completed");
+            fatal("Escape sequence not completed");
 
         switch (c) {
             case '_': case '~': case '.':  case '-':  case '!':  case '$':  case '&':
@@ -1200,7 +1215,7 @@ public final class TokenizerText implements Tokenizer
             case '=':  case '/':  case '?':  case '#':  case '@':  case '%':
                 return c;
             default:
-                error("illegal character escape value: \\%c", c);
+                fatal("illegal character escape value: \\%c", c);
                 return 0;
         }
     }
@@ -1211,7 +1226,7 @@ public final class TokenizerText implements Tokenizer
     private final int readUnicode8Escape() {
         int ch8 = readHexSequence(8);
         if ( ch8 > Character.MAX_CODE_POINT )
-            error("Illegal code point in \\U sequence value: 0x%08X", ch8);
+            fatal("Illegal code point in \\U sequence value: 0x%08X", ch8);
         return ch8;
     }
 
@@ -1229,12 +1244,12 @@ public final class TokenizerText implements Tokenizer
     private final int readHexChar() {
         int ch = reader.readChar();
         if ( ch == EOF )
-            error("Not a hexadecimal character (end of file)");
+            fatal("Not a hexadecimal character (end of file)");
 
         int x = valHexChar(ch);
         if ( x != -1 )
             return x;
-        error("Not a hexadecimal character: " + (char)ch);
+        fatal("Not a hexadecimal character: " + (char)ch);
         return -1;
     }
 
@@ -1242,12 +1257,12 @@ public final class TokenizerText implements Tokenizer
         for (int i = 0; i < str.length(); i++) {
             char want = str.charAt(i);
             if ( reader.eof() ) {
-                error("End of input during expected string: " + str);
+                fatal("End of input during expected string: " + str);
                 return false;
             }
             int inChar = reader.peekChar();
             if ( inChar != want ) {
-                error("expected \"" + str + "\"");
+                fatal("expected \"" + str + "\"");
                 return false;
             }
             reader.readChar();
@@ -1255,17 +1270,28 @@ public final class TokenizerText implements Tokenizer
         return true;
     }
 
+    /** Warning - can continue. */ 
     private void warning(String message, Object... args) {
         String msg = String.format(message, args);
         errorHandler.warning(msg, reader.getLineNum(), reader.getColNum());
-        //exception(message, args);
     }
 
+    /** Error - at the tokenizer level, it can continue (with some junk) but it is a serious error and the   
+     * caller probably should treat as an error and stop.
+     * @param message
+     * @param args
+     */
     private void error(String message, Object... args) {
         String msg = String.format(message, args);
+        errorHandler.error(msg, reader.getLineNum(), reader.getColNum());
+    }
+
+    /** Structural error - unrecoverable - but reported as ERROR (FATAL can imply system fault) */
+    private void fatal(String message, Object... args) {
+        String msg = String.format(message, args);
         long line = reader.getLineNum();
         long col = reader.getColNum();
-        errorHandler.error(msg, line, col);
+        errorHandler.fatal(msg, line, col);
         // We require that errors cause the tokenizer to stop so in case the
         // provided error handler does not, we throw an exception.
         throw new RiotParseException(message, line, col);
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
index d9fd15a..3e6584e 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/AbstractTestLangNTuples.java
@@ -131,7 +131,7 @@ abstract public class AbstractTestLangNTuples
     }
 
     // Bad terms - but accepted by default.
-    @Test(expected = ExFatal.class)
+    @Test(expected = ExError.class)
     public void tuple_bad_10() {
         parseCount("<x> <p> <bad uri> .");
     }
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTrig.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTrig.java
index abd4a4d..ccc2ffa 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTrig.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTrig.java
@@ -22,7 +22,7 @@ import static org.junit.Assert.assertEquals;
 
 import org.apache.jena.graph.Triple ;
 import org.apache.jena.riot.ErrorHandlerTestLib ;
-import org.apache.jena.riot.ErrorHandlerTestLib.ExFatal ;
+import org.apache.jena.riot.ErrorHandlerTestLib.ExError;
 import org.apache.jena.riot.ErrorHandlerTestLib.ExWarning ;
 import org.apache.jena.riot.Lang ;
 import org.apache.jena.sparql.core.DatasetGraph ;
@@ -67,13 +67,13 @@ public class TestLangTrig
     // Also need to check that the RiotExpection is called in normal use. 
     
     // Bad terms.
-    @Test (expected=ExFatal.class)
+    @Test (expected=ExError.class)
     public void trig_20()     { parse("@prefix ex:  <bad iri> .", "{ ex:s ex:p 123 }") ; }
     
-    @Test (expected=ExFatal.class)
+    @Test (expected=ExError.class)
     public void trig_21()     { parse("@prefix ex:  <http://example/> .", "{ ex:s <http://example/broken p> 123 }") ; }
     
-    @Test (expected=ExFatal.class)
+    @Test (expected=ExError.class)
     public void trig_22()     { parse("{ <x> <p> 'number'^^<bad uri> }") ; }
 
     @Test (expected=ExWarning.class)
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTurtle.java b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTurtle.java
index b4bb87b..f7c66f0 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTurtle.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/lang/TestLangTurtle.java
@@ -33,6 +33,7 @@ import org.apache.jena.rdf.model.Model ;
 import org.apache.jena.rdf.model.ModelFactory ;
 import org.apache.jena.rdf.model.Property ;
 import org.apache.jena.rdf.model.Resource ;
+import org.apache.jena.riot.ErrorHandlerTestLib.ExError;
 import org.apache.jena.riot.ErrorHandlerTestLib.ExFatal ;
 import org.apache.jena.riot.ErrorHandlerTestLib.ExWarning ;
 import org.apache.jena.riot.Lang ;
@@ -157,7 +158,7 @@ public class TestLangTurtle
     @Test(expected=ExFatal.class)
     public void errorBadDatatype()          { parse("<p> <p> 'q'^^.") ; }
     
-    @Test(expected=ExFatal.class)
+    @Test(expected=ExError.class)
     public void errorBadURI_1()
     { parse("<http://example/a b> <http://example/p> 123 .") ; }
 
@@ -171,10 +172,10 @@ public class TestLangTurtle
     { parse("<http://example/a%Aab> <http://example/p> 123 .") ; }
 
     // Bad URIs
-    @Test (expected=ExFatal.class)
+    @Test (expected=ExError.class)
     public void errorBadURI_4()     { parse("@prefix ex:  <bad iri> .  ex:s ex:p 123 ") ; }
     
-    @Test (expected=ExFatal.class)
+    @Test (expected=ExError.class)
     public void errorBadURI_5()     { parse("<x> <p> 'number'^^<bad uri> ") ; }
     
     @Test (expected=ExFatal.class)
diff --git a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
index 344cb97..9621ddd 100644
--- a/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
+++ b/jena-arq/src/test/java/org/apache/jena/riot/tokens/TestTokenizer.java
@@ -33,14 +33,14 @@ import org.apache.jena.sparql.ARQConstants ;
 import org.junit.Test ;
 
 public class TestTokenizer {
-    // WORKERS
+
     private static Tokenizer tokenizer(String string) {
         return tokenizer(string, false) ;
     }
 
     private static Tokenizer tokenizer(String string, boolean lineMode) {
         PeekReader r = PeekReader.readString(string) ;
-        Tokenizer tokenizer = new TokenizerText(r, lineMode) ;
+        Tokenizer tokenizer = TokenizerText.create().source(r).lineMode(lineMode).build();
         return tokenizer ;
     }
 
@@ -1109,7 +1109,7 @@ public class TestTokenizer {
 
     @Test
     public void token_rdf_star_1() {
-        Tokenizer tokenizer = tokenizer("<<>>", true) ;
+        Tokenizer tokenizer = tokenizer("<<>>") ;
         testNextToken(tokenizer, TokenType.LT2) ;
         testNextToken(tokenizer, TokenType.GT2) ;
         assertFalse(tokenizer.hasNext()) ;
@@ -1117,7 +1117,7 @@ public class TestTokenizer {
 
     @Test
     public void token_rdf_star_2() {
-        Tokenizer tokenizer = tokenizer("<< >>", true) ;
+        Tokenizer tokenizer = tokenizer("<< >>") ;
         testNextToken(tokenizer, TokenType.LT2) ;
         testNextToken(tokenizer, TokenType.GT2) ;
         assertFalse(tokenizer.hasNext()) ;
@@ -1125,7 +1125,7 @@ public class TestTokenizer {
 
     @Test
     public void token_rdf_star_3() {
-        Tokenizer tokenizer = tokenizer("<<:s x:p 123>> :q ", true) ;
+        Tokenizer tokenizer = tokenizer("<<:s x:p 123>> :q ") ;
         testNextToken(tokenizer, TokenType.LT2) ;
         testNextToken(tokenizer, TokenType.PREFIXED_NAME, "", "s") ;
         testNextToken(tokenizer, TokenType.PREFIXED_NAME, "x", "p") ;
@@ -1137,7 +1137,7 @@ public class TestTokenizer {
 
     @Test
     public void token_rdf_star_4() {
-        Tokenizer tokenizer = tokenizer("<<<>>>", true) ;
+        Tokenizer tokenizer = tokenizer("<<<>>>") ;
         testNextToken(tokenizer, TokenType.LT2) ;
         Token t = testNextToken(tokenizer, TokenType.IRI) ;
         assertEquals("", t.getImage());
diff --git a/jena-arq/testing/RIOT/Lang/Changes b/jena-arq/testing/RIOT/Lang/Changes
new file mode 100644
index 0000000..74211c1
--- /dev/null
+++ b/jena-arq/testing/RIOT/Lang/Changes
@@ -0,0 +1,15 @@
+Tests localName_with_nfc_PN_CHARS_BASE_character_boundaries
+in Turtle and Trig contain IRIs with the character \U000E01EF
+in the result nt/nq files.
+
+That character is illegal in IRIs, even if allowed by syntax.
+So it causes a failure whn reading the test.
+
+It is not in RFC 3987 - the block E0000-E0FFF is excluded.
+
+   ucschar        = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
+                  / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
+                  / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
+                  / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
+                  / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
+                  / %xD0000-DFFFD / %xE1000-EFFFD
diff --git a/jena-arq/testing/RIOT/Lang/TrigStd/manifest.ttl b/jena-arq/testing/RIOT/Lang/TrigStd/manifest.ttl
index 2b21df6..7289612 100644
--- a/jena-arq/testing/RIOT/Lang/TrigStd/manifest.ttl
+++ b/jena-arq/testing/RIOT/Lang/TrigStd/manifest.ttl
@@ -53,7 +53,10 @@
     <#underscore_in_localName>
     <#localname_with_COLON>
     <#localName_with_assigned_nfc_bmp_PN_CHARS_BASE_character_boundaries>
-    <#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries>
+
+    ## Contains \U000E01EF in the result which is not legal in a IRI.
+    ##   <#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries>
+
     <#localName_with_nfc_PN_CHARS_BASE_character_boundaries>
     <#localName_with_leading_underscore>
     <#localName_with_leading_digit>
diff --git a/jena-arq/testing/RIOT/Lang/TurtleStd/manifest.ttl b/jena-arq/testing/RIOT/Lang/TurtleStd/manifest.ttl
index cc07d8f..807d9e5 100644
--- a/jena-arq/testing/RIOT/Lang/TurtleStd/manifest.ttl
+++ b/jena-arq/testing/RIOT/Lang/TurtleStd/manifest.ttl
@@ -41,7 +41,10 @@
     <#underscore_in_localName>
     <#localname_with_COLON>
     <#localName_with_assigned_nfc_bmp_PN_CHARS_BASE_character_boundaries>
-    <#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries>
+
+    ## Contains \U000E01EF in the result which is not legal in a IRI.
+    ##   <#localName_with_assigned_nfc_PN_CHARS_BASE_character_boundaries>
+
     <#localName_with_nfc_PN_CHARS_BASE_character_boundaries>
     <#localName_with_leading_underscore>
     <#localName_with_leading_digit>
diff --git a/jena-core/src/main/java/org/apache/jena/util/FileManagerImpl.java b/jena-core/src/main/java/org/apache/jena/util/FileManagerImpl.java
index e4fab09..11bc695 100644
--- a/jena-core/src/main/java/org/apache/jena/util/FileManagerImpl.java
+++ b/jena-core/src/main/java/org/apache/jena/util/FileManagerImpl.java
@@ -114,7 +114,10 @@ public class FileManagerImpl implements FileManager
     }
     
     /** Create with the given location mapper */
-    protected FileManagerImpl(LocationMapper _mapper)    { setLocationMapper(_mapper) ; }
+    protected FileManagerImpl(LocationMapper _mapper) {
+        this();
+        setLocationMapper(_mapper);
+    }
 
     @Override
     public FileManager clone() { return clone(this) ; } 
diff --git a/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/sys/Sys.java b/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/sys/Sys.java
index cfd7c47..dca2415 100644
--- a/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/sys/Sys.java
+++ b/jena-db/jena-dboe-base/src/main/java/org/apache/jena/dboe/sys/Sys.java
@@ -30,7 +30,7 @@ import org.slf4j.LoggerFactory;
 /** Low level environment */
 public class Sys
 {
-    static final Logger log = LoggerFactory.getLogger("Sys");
+    static final Logger log = LoggerFactory.getLogger("org.apache.jena.dboe.Sys");
 
     /** System log - use for general messages (a few) and warnings.
      *  Generally, do not log events unless you want every user to see them every time.
@@ -39,9 +39,9 @@ public class Sys
      */
 
     /** General system log */
-    public static final Logger syslog = LoggerFactory.getLogger("System");
+    public static final Logger syslog = LoggerFactory.getLogger("org.apache.jena.dboe.System");
     /** Send warnings and error */
-    public static final Logger errlog = LoggerFactory.getLogger("System");
+    public static final Logger errlog = LoggerFactory.getLogger("org.apache.jena.dboe.System");
 
     /** Size, in bytes, of a Java long */
     public static final int SizeOfLong              = Long.BYTES; // Long.SIZE/Byte.SIZE ;
diff --git a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/sys/SystemTDB.java b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/sys/SystemTDB.java
index fe4209e..0107729 100644
--- a/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/sys/SystemTDB.java
+++ b/jena-db/jena-tdb2/src/main/java/org/apache/jena/tdb2/sys/SystemTDB.java
@@ -135,7 +135,7 @@ public class SystemTDB
         propertyFileName = x;
     }
 
-    public static final boolean is64bitSystem = determineIf64Bit();
+    public static final boolean is64bitSystem = Sys.is64bitSystem;
 
     private static Properties properties = readPropertiesFile();
 
@@ -314,40 +314,6 @@ public class SystemTDB
         return p;
     }
 
-    // --------
-
-    public static final boolean isWindows = determineIfWindows();	// Memory mapped files behave differently.
-
-    //Or look in File.listRoots.
-    //Alternative method:
-    //  http://stackoverflow.com/questions/1293533/name-of-the-operating-system-in-java-not-os-name
-
-    private static boolean determineIfWindows() {
-    	String s = System.getProperty("os.name");
-    	if ( s == null )
-    		return false;
-    	return s.startsWith("Windows ");
-	}
-
-    private static boolean determineIf64Bit() {
-        String s = System.getProperty("sun.arch.data.model");
-        if ( s != null ) {
-            boolean b = s.equals("64");
-            TDB2.logInfo.debug("System architecture: " + (b ? "64 bit" : "32 bit"));
-            return b;
-        }
-        // Not a SUN VM
-        s = System.getProperty("java.vm.info");
-        if ( s == null ) {
-            log.warn("Can't determine the data model");
-            return false;
-        }
-        log.debug("Can't determine the data model from 'sun.arch.data.model' - using java.vm.info");
-        boolean b = s.contains("64");
-        TDB2.logInfo.debug("System architecture: (from java.vm.info) " + (b ? "64 bit" : "32 bit"));
-        return b;
-    }
-
     // ---- File mode
 
     private static FileMode fileMode = null;
diff --git a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/ConfigTest.java b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/ConfigTest.java
index e1d2f59..937146c 100644
--- a/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/ConfigTest.java
+++ b/jena-db/jena-tdb2/src/test/java/org/apache/jena/tdb2/ConfigTest.java
@@ -19,7 +19,7 @@
 package org.apache.jena.tdb2;
 
 import org.apache.jena.atlas.lib.FileOps;
-import org.apache.jena.tdb2.sys.SystemTDB;
+import org.apache.jena.base.Sys;
 
 public class ConfigTest
 {
@@ -27,7 +27,7 @@ public class ConfigTest
     // Place under target
     private static final String testingDir = "target/tdb-testing";
     private static final String testingDirDB = "target/tdb-testing/DB";
-    static boolean nonDeleteableMMapFiles = SystemTDB.isWindows;
+    static boolean nonDeleteableMMapFiles = Sys.isWindows;
 
     static boolean initialized = false;
 
diff --git a/jena-fuseki2/jena-fuseki-webapp/src/test/java/org/apache/jena/fuseki/TestAdminAPI.java b/jena-fuseki2/jena-fuseki-webapp/src/test/java/org/apache/jena/fuseki/TestAdminAPI.java
index d895a65..b453a51 100644
--- a/jena-fuseki2/jena-fuseki-webapp/src/test/java/org/apache/jena/fuseki/TestAdminAPI.java
+++ b/jena-fuseki2/jena-fuseki-webapp/src/test/java/org/apache/jena/fuseki/TestAdminAPI.java
@@ -34,6 +34,7 @@ import org.apache.http.NameValuePair;
 import org.apache.http.client.entity.UrlEncodedFormEntity;
 import org.apache.jena.atlas.web.HttpException;
 import org.apache.jena.atlas.web.TypedInputStream;
+import org.apache.jena.base.Sys;
 import org.apache.jena.fuseki.webapp.FusekiWebapp;
 import org.apache.jena.query.QueryExecution;
 import org.apache.jena.rdfconnection.RDFConnection;
@@ -61,7 +62,7 @@ public class TestAdminAPI extends AbstractFusekiTest {
 
     @Test public void add_delete_api_3() throws Exception {
         // Deleted mmap files on Windows does not go away until the JVM exits.
-        if ( org.apache.jena.tdb2.sys.SystemTDB.isWindows )
+        if ( Sys.isWindows )
             return;
         testAddDelete("db_tdb2", "tdb2", true);
     }
diff --git a/jena-iri/src/main/java/org/apache/jena/iri/impl/Parser.java b/jena-iri/src/main/java/org/apache/jena/iri/impl/Parser.java
index 61755ad..18c55fc 100644
--- a/jena-iri/src/main/java/org/apache/jena/iri/impl/Parser.java
+++ b/jena-iri/src/main/java/org/apache/jena/iri/impl/Parser.java
@@ -18,14 +18,10 @@
 
 package org.apache.jena.iri.impl;
 
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.LineNumberReader;
-import java.io.Reader;
 import java.util.Iterator;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-
+import java.io.*;
 import java.net.IDN;
 
 import org.apache.jena.iri.* ;
@@ -227,15 +223,27 @@ public class Parser implements IRIComponents, ViolationCodes {
         }
     }
 
+    static public void devParse(String uriStr) throws IOException {
+        LineNumberReader in = new LineNumberReader(new StringReader(uriStr));
+        devParse(in);
+    }
+    
     static public void main(String args[]) throws IOException {
-        LineNumberReader in = new LineNumberReader(new InputStreamReader(
-                System.in));
+        LineNumberReader in = new LineNumberReader(new InputStreamReader(System.in));
+        devParse(in);
+    }
+    
+    static private void devParse(LineNumberReader in) throws IOException {
+        
         IRIImpl last = null;
         DEBUG = true;
 
         IRIFactory factory = IRIFactory.iriImplementation();
         while (true) {
-            String s = in.readLine().trim();
+            String s = in.readLine();
+            if ( s == null )
+                return;
+            s = s.trim();
             if (s.equals("quit"))
                 return;
             IRIImpl iri = (IRIImpl) factory.create(s);