You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2008/01/01 05:59:47 UTC

svn commit: r607801 [1/5] - in /incubator/abdera/java/trunk: client/src/main/java/org/apache/abdera/protocol/client/ core/src/main/java/org/apache/abdera/util/ dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/ dependencies/i18n/src/main/java/o...

Author: jmsnell
Date: Mon Dec 31 20:59:44 2007
New Revision: 607801

URL: http://svn.apache.org/viewvc?rev=607801&view=rev
Log:
Significant refactoring and improvements to the i18n unicode, text handling and io code.
Most of the text-manipulation and unicode related code is now bundled together in the 
org.apache.abdera.i18n.text package.  The unicode and nameprep implementation have been 
improved using inversion sets where appropriate.  This will yield a performance increase
and a decrease in memory consumption for certain operations.  Also, the Nameprep impl previously
used to load and parse a file to initialize the algorithm; this is now removed.  Many other
improvements are made; most of which are internal.  I'll be doing some additional refactoring
here and there.

Added:
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/UnicodeCharacterDatabase.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/UnicodeData.txt
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/CharsetSniffingInputStream.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/DynamicPushbackInputStream.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/FilteredCharReader.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/InputStreamDataSource.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/PeekAheadInputStream.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/PipeChannel.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/RewindableInputStream.java
    incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/unicode/
    incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/unicode/NormalizationExample.java
Removed:
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Escaping.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Nameprep.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/NameprepCodepointIterator.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Punycode.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/unicode/
Modified:
    incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java
    incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIRI.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNFKC.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java
    incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestPunycode.java
    incubator/abdera/java/trunk/extensions/converters/src/main/java/org/apache/abdera/converter/impl/ContentConverter.java
    incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
    incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMEntry.java
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java
    incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMXmlVersionInputStream.java
    incubator/abdera/java/trunk/parser/src/test/java/org/apache/abdera/test/parser/EncodingTest.java
    incubator/abdera/java/trunk/parser/src/test/java/org/apache/abdera/test/parser/stax/FeedParserTest.java
    incubator/abdera/java/trunk/protocol/src/main/java/org/apache/abdera/protocol/util/AbstractMessage.java
    incubator/abdera/java/trunk/protocol/src/main/java/org/apache/abdera/protocol/util/EncodingUtil.java
    incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/ResponseContext.java
    incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractCollectionProvider.java
    incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractResponseContext.java
    incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractServiceProvider.java
    incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/servlet/AbstractFilter.java

Modified: incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java (original)
+++ incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java Mon Dec 31 20:59:44 2007
@@ -26,8 +26,8 @@
 
 import javax.activation.MimeType;
 
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-import org.apache.abdera.i18n.iri.Escaping;
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
 import org.apache.abdera.protocol.Request;
 import org.apache.abdera.protocol.util.AbstractRequest;
 import org.apache.abdera.protocol.util.CacheControlUtil;
@@ -425,7 +425,7 @@
         slug.indexOf((char)13) > -1)
       throw new IllegalArgumentException(
         Messages.get("SLUG.BAD.CHARACTERS"));
-    return setHeader("Slug", Escaping.encode(slug,Profile.ASCIISANSCRLF));
+    return setHeader("Slug", UrlEncoding.encode(slug,Profile.ASCIISANSCRLF.filter()));
   }
   
   /**

Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java (original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java Mon Dec 31 20:59:44 2007
@@ -17,13 +17,12 @@
 */
 package org.apache.abdera.util;
 
-import java.io.FilterReader;
-import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
 import java.io.Reader;
 import java.io.UnsupportedEncodingException;
 
+import org.apache.abdera.i18n.text.io.FilteredCharReader;
 import org.apache.abdera.util.XmlUtil.XMLVersion;
 
 /**
@@ -35,15 +34,12 @@
  * is a valid XML character itself.
  */
 public class XmlRestrictedCharReader 
-  extends FilterReader {
+  extends FilteredCharReader {
 
   /**
    * The XMLVersion determines which set of restrictions to apply depending 
    * on the XML version being parsed
    */
-  private final XMLVersion version;
-  private final char replacement;
-  
   public XmlRestrictedCharReader(InputStream in) {
     this(new InputStreamReader(in));
   }
@@ -120,37 +116,7 @@
     Reader in, 
     XMLVersion version, 
     char replacement) {
-      super(in);
-      this.version = version;
-      this.replacement = replacement;
-      if (replacement != 0 && 
-          ((!Character.isValidCodePoint(replacement)) || 
-          XmlUtil.restricted(version,replacement))) 
-            throw new IllegalArgumentException();
-  }
-
-  @Override
-  public int read() throws IOException {
-    int c = -1;
-    if (replacement == 0) {
-      while(((c = super.read()) != -1 && XmlUtil.restricted(version,c))) {}
-    } else {
-      c = super.read();
-      if (c != -1 && XmlUtil.restricted(version,c)) c = replacement;
-    }
-    return c;
-  }
-
-  @Override
-  public int read(char[] cbuf, int off, int len) throws IOException {
-    int n = off;
-    for (; n < Math.min(len,cbuf.length-off); n++) {
-      int r = read();
-      if (r != -1) cbuf[n] = (char)r;
-      else break;
-    }
-    n -= off;
-    return n <= 0 ? -1 : n;
+      super(in,version.filter(),replacement);
   }
 
 }

Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java (original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java Mon Dec 31 20:59:44 2007
@@ -18,10 +18,31 @@
 package org.apache.abdera.util;
 
 import org.apache.abdera.i18n.ChainableBitSet;
+import org.apache.abdera.i18n.text.Filter;
 
 public class XmlUtil {
 
-  public enum XMLVersion { XML10, XML11 };
+  public enum XMLVersion { 
+    XML10, 
+    XML11;
+    private final Filter filter;
+    XMLVersion() {
+      this.filter = new XmlFilter(this);
+    }
+    public Filter filter() {
+      return filter;
+    }
+  };
+  
+  private static class XmlFilter implements Filter {
+    private final XMLVersion version;
+    XmlFilter(XMLVersion version) {
+      this.version = version;
+    }
+    public boolean accept(int c) {
+      return !restricted(version, c);
+    }
+  }
   
   private static final ChainableBitSet restrictedchar10 =
     new ChainableBitSet().set2(0, 8)

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java Mon Dec 31 20:59:44 2007
@@ -17,8 +17,8 @@
 */
 package org.apache.abdera.i18n.iri;
 
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
 
 class HttpScheme extends AbstractScheme {
 
@@ -54,8 +54,8 @@
         host,
         port,
         IRI.normalize(iri.getPath()),
-        Escaping.encode(Escaping.decode(iri.getQuery()),Profile.IQUERY),
-        Escaping.encode(Escaping.decode(iri.getFragment()),Profile.IFRAGMENT)
+        UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()),Profile.IQUERY.filter()),
+        UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()),Profile.IFRAGMENT.filter())
       );
   }
   

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java Mon Dec 31 20:59:44 2007
@@ -21,8 +21,10 @@
 import java.io.Serializable;
 import java.net.UnknownHostException;
 
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.Nameprep;
+import org.apache.abdera.i18n.text.Punycode;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
 
 
 /**

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java Mon Dec 31 20:59:44 2007
@@ -24,11 +24,13 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.InvalidCharacterException;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-import org.apache.abdera.i18n.unicode.Normalizer;
-import org.apache.abdera.i18n.unicode.UnicodeCharacterDatabase;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.InvalidCharacterException;
+import org.apache.abdera.i18n.text.Nameprep;
+import org.apache.abdera.i18n.text.Normalizer;
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
+import org.apache.abdera.i18n.text.data.UnicodeCharacterDatabase;
 
 public final class IRI 
   implements Serializable, 
@@ -139,10 +141,10 @@
   
   private void init() {    
     a_host = IDNA.toASCII(host);
-    a_fragment = Escaping.encode(fragment,Profile.FRAGMENT);
-    a_path = Escaping.encode(path, Profile.PATH);
-    a_query = Escaping.encode(query,Profile.QUERY, Profile.PATH);
-    a_userinfo = Escaping.encode(userinfo,Profile.USERINFO);
+    a_fragment = UrlEncoding.encode(fragment,Profile.FRAGMENT.filter());
+    a_path = UrlEncoding.encode(path, Profile.PATH.filter());
+    a_query = UrlEncoding.encode(query,Profile.QUERY.filter(), Profile.PATH.filter());
+    a_userinfo = UrlEncoding.encode(userinfo,Profile.USERINFO.filter());
     a_authority = buildASCIIAuthority();
   }
     
@@ -285,9 +287,9 @@
       buildAuthority(buf,aui,ah,port);
       return buf.toString();
     } else {
-      return Escaping.encode(
+      return UrlEncoding.encode(
         authority, 
-        Profile.AUTHORITY);
+        Profile.AUTHORITY.filter());
     }
   }
   
@@ -485,8 +487,8 @@
         iri.getHost(),
         iri.getPort(),
         normalize(iri.getPath()),
-        Escaping.encode(Escaping.decode(iri.getQuery()),Profile.IQUERY),
-        Escaping.encode(Escaping.decode(iri.getFragment()),Profile.IFRAGMENT)
+        UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()),Profile.IQUERY.filter()),
+        UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()),Profile.IFRAGMENT.filter())
       );
   }
 
@@ -512,10 +514,10 @@
       if (segments[n] != null) {
         if (buf.length() > 1) buf.append('/');
         buf.append(
-          Escaping.encode(
-            Escaping.decode(
+            UrlEncoding.encode(
+                UrlEncoding.decode(
               segments[n]),
-              Profile.IPATHNODELIMS));
+              Profile.IPATHNODELIMS.filter()));
       }
     }
     if (path.endsWith("/") || path.endsWith("/.")) 
@@ -552,8 +554,8 @@
       buf.append(':');
     }
     buf.append(getSchemeSpecificPart());
-    return Escaping.encode(buf.toString(),
-      Profile.SCHEMESPECIFICPART);
+    return UrlEncoding.encode(buf.toString(),
+      Profile.SCHEMESPECIFICPART.filter());
   }
   
   public String toASCIIString() {
@@ -568,7 +570,7 @@
   }
   
   public String toBIDIString() {
-    return CharUtils.bidiLRE(toString());
+    return CharUtils.wrapBidi(toString(),CharUtils.LRE);
   }
   
   public java.net.URI toURI() 

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java Mon Dec 31 20:59:44 2007
@@ -21,9 +21,9 @@
 import java.util.Iterator;
 import java.util.Locale;
 
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.InvalidCharacterException;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.InvalidCharacterException;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
 
 
 /**

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java Mon Dec 31 20:59:44 2007
@@ -25,9 +25,9 @@
 import java.util.List;
 import java.util.Map;
 
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.iri.Escaping;
-import org.apache.abdera.i18n.unicode.Normalizer;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.Normalizer;
+import org.apache.abdera.i18n.text.UrlEncoding;
 
 @SuppressWarnings("unchecked") 
 public abstract class Operation
@@ -144,21 +144,17 @@
     if (val == null) return null;
     if (val.getClass().isArray()) {
       if (val instanceof byte[]) {
-        return Escaping.encode((byte[])val);
+        return UrlEncoding.encode((byte[])val);
       } else if (val instanceof char[]) {
-        try {
-          String chars = new String((char[])val);
-          return Escaping.encode(
-              !context.isNormalizing() ? chars : 
-              Normalizer.normalize(
-                chars, 
-                Normalizer.Form.C).toString(), 
-              context.isIri() ? 
-                CharUtils.Profile.IUNRESERVED : 
-                CharUtils.Profile.UNRESERVED);
-        } catch (IOException e) {
-          throw new RuntimeException(e);
-        }
+        String chars = new String((char[])val);
+        return UrlEncoding.encode(
+            !context.isNormalizing() ? chars : 
+            Normalizer.normalize(
+              chars, 
+              Normalizer.Form.C).toString(), 
+            context.isIri() ? 
+              CharUtils.Profile.IUNRESERVED.filter() : 
+              CharUtils.Profile.UNRESERVED.filter());
       } else if (val instanceof short[]) {
         StringBuilder buf = new StringBuilder();
         short[] array = (short[]) val;
@@ -206,18 +202,18 @@
       return toString(((Template)val).getPattern(),context);
     } else if (val instanceof InputStream) {
       try {
-        return Escaping.encode((InputStream)val);
+        return UrlEncoding.encode((InputStream)val);
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
     } else if (val instanceof Readable) {
       try {
-        return Escaping.encode(
+        return UrlEncoding.encode(
           (Readable)val, 
           "UTF-8",
           context.isIri() ? 
-            CharUtils.Profile.IUNRESERVED : 
-            CharUtils.Profile.UNRESERVED);
+            CharUtils.Profile.IUNRESERVED.filter() : 
+            CharUtils.Profile.UNRESERVED.filter());
       } catch (IOException e) {
         throw new RuntimeException(e);
       }
@@ -227,7 +223,7 @@
         context.isIri(),
         context.isNormalizing());
     } else if (val instanceof Byte) {
-      return Escaping.encode(((Byte)val).byteValue());
+      return UrlEncoding.encode(((Byte)val).byteValue());
     } else if (val instanceof Iterable) {
       StringBuilder buf = new StringBuilder();
       Iterable i = (Iterable) val;
@@ -308,18 +304,14 @@
     CharSequence val, 
     boolean isiri, 
     boolean normalizing) {
-      try {
-        return Escaping.encode(
-            !normalizing ? val : 
-            Normalizer.normalize(
-              val, 
-              Normalizer.Form.C).toString(), 
-            isiri ? 
-              CharUtils.Profile.IUNRESERVED : 
-              CharUtils.Profile.UNRESERVED);
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
+      return UrlEncoding.encode(
+          !normalizing ? val : 
+          Normalizer.normalize(
+            val, 
+            Normalizer.Form.C).toString(), 
+          isiri ? 
+            CharUtils.Profile.IUNRESERVED.filter() : 
+            CharUtils.Profile.UNRESERVED.filter());
   }  
   
   private static final class DefaultOperation extends Operation {

Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java Mon Dec 31 20:59:44 2007
@@ -27,7 +27,7 @@
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.abdera.i18n.io.CharUtils;
+import org.apache.abdera.i18n.text.CharUtils;
 
 /**
  * Used to evaluate a URI Template.  
@@ -82,7 +82,7 @@
         token, 
         forDisplay(token));
     }
-    return CharUtils.bidiLRO(pattern);
+    return CharUtils.wrapBidi(pattern, CharUtils.LRO);
   }
   
   private static String forDisplay(String token) {
@@ -91,7 +91,7 @@
     buf.append('{');
     if (splits.length == 1) {
       String[] pair = splits[0].split("\\s*=\\s*");
-      buf.append(CharUtils.bidiLRE(pair[0]));
+      buf.append(CharUtils.wrapBidi(pair[0], CharUtils.LRE));
       if (pair.length > 1) {
         buf.append('=');
         buf.append(pair[1]);
@@ -106,7 +106,7 @@
       for (String var : vars) {
         if (i++ > 0) buf.append(",");
         String[] pair = var.split("\\s*=\\s*");
-        buf.append(CharUtils.bidiLRE(pair[0]));
+        buf.append(CharUtils.wrapBidi(pair[0], CharUtils.LRE));
         if (pair.length > 1) {
           buf.append('=');
           buf.append(pair[1]);
@@ -243,7 +243,7 @@
     StringBuilder buf = new StringBuilder();
     buf.append("V:" + getPatternForDisplay());
     buf.append('\n');
-    buf.append("L:" + CharUtils.bidiLRO(getPattern()));
+    buf.append("L:" + CharUtils.wrapBidi(getPattern(),CharUtils.LRO));
     buf.append('\n');
     return buf.toString();
   }

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,817 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+  private CharUtils() {}
+ 
+  public static boolean isValid(int c) {
+    return c >= 0x000000 && c <= 0x10ffff;
+  }
+  
+  public static boolean isValid(Codepoint c) {
+    return isValid(c.getValue());
+  }
+  
+  public static boolean inRange(char[] chars, char low, char high) {
+    for (int i = 0; i < chars.length; i++)
+      if (chars[i] < low || chars[i] > high) return false;
+    return true;
+  }
+
+  public static boolean inRange(char[] chars, int low, int high) {
+    for (int i = 0; i < chars.length; i++) {
+      char n = chars[i];
+      Codepoint cp = (isHighSurrogate(n) && 
+               i + 1 < chars.length && 
+               isLowSurrogate(chars[i+1])) ? 
+                 toSupplementary(n,chars[i++]) : 
+                 new Codepoint(n);
+      int c = cp.getValue();
+      if (c < low || c > high) return false;
+    }
+    return true;
+  }
+  
+  public static boolean inRange(int codepoint, int low, int high) {
+    return codepoint >= low && codepoint <= high;
+  }
+  
+  public static void append(Appendable buf, Codepoint c) {
+    append(buf,c.getValue());
+  }
+  
+  public static void append(Appendable buf, int c) {
+    try {
+      if (isSupplementary(c)) {
+        buf.append(getHighSurrogate(c));
+        buf.append(getLowSurrogate(c));
+      } else buf.append((char)c);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static char getHighSurrogate(int c) {
+    return (c >= 0x10000) ?
+       (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+  }
+
+  public static char getLowSurrogate(int c) {    
+    return (c >= 0x10000) ?
+        (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+  }
+  
+  public static boolean isHighSurrogate(char c) {
+    return c <= '\uDBFF' && c >= '\uD800';
+  }
+
+  public static boolean isLowSurrogate(char c) {
+    return c <= '\uDFFF' && c >= '\uDC00';
+  }
+  
+  public static boolean isSupplementary(int c) {
+    return c <= 0x10ffff && c >= 0x010000;
+  }
+  
+  public static boolean isSurrogatePair(char high, char low) {
+    return isHighSurrogate(high) && isLowSurrogate(low);
+  }
+  
+  public static Codepoint toSupplementary(char high, char low) {
+    if (!isHighSurrogate(high)) 
+      throw new IllegalArgumentException("Invalid High Surrogate");
+    if (!isLowSurrogate(low))
+      throw new IllegalArgumentException("Invalid Low Surrogate");
+    return new Codepoint(((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000);    
+  }
+
+  public static Codepoint codepointAt(String s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c);
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toSupplementary(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toSupplementary(high,c);
+      }
+    }
+    return new Codepoint(c);
+  }
+  
+  public static Codepoint codepointAt(CharSequence s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c);
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toSupplementary(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toSupplementary(high,c);
+      }
+    }
+    return new Codepoint(c);
+  }
+  
+  public static void insert(CharSequence s, int i, Codepoint c) {
+    insert(s,i,c.getValue());
+  }
+  
+  public static void insert(CharSequence s, int i, int c) {
+    if (!(s instanceof StringBuilder) && 
+        !(s instanceof StringBuffer)) { 
+      insert(new StringBuilder(s),i,c);
+    } else {
+      if (i > 0 && i < s.length()) {
+        char ch = s.charAt(i);
+        boolean low = isLowSurrogate(ch);
+        if (low) {
+          if (low && isHighSurrogate(s.charAt(i-1))) {
+            i--;
+          }
+        }
+      }
+      if (s instanceof StringBuffer) 
+        ((StringBuffer)s).insert(i, toString(c));
+      else if (s instanceof StringBuilder)
+        ((StringBuilder)s).insert(i, toString(c));
+    }
+  }
+  
+  public static void setChar(CharSequence s, int i, Codepoint c) {
+    setChar(s,i,c.getValue());
+  }
+  
+  public static void setChar(CharSequence s, int i, int c) {
+    if (!(s instanceof StringBuilder) && 
+        !(s instanceof StringBuffer)) { 
+      setChar(new StringBuilder(s),i,c);
+    } else {
+      int l = 1;
+      char ch = s.charAt(i);
+      boolean high = isHighSurrogate(ch);
+      boolean low = isLowSurrogate(ch);
+      if (high || low) {
+        if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+        else {
+          if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+            i--; l++;
+          }
+        }
+      }
+      if (s instanceof StringBuffer)
+        ((StringBuffer)s).replace(i, i+l, toString(c));
+      else if (s instanceof StringBuilder)
+        ((StringBuilder)s).replace(i, i+l, toString(c));
+    }
+  }
+  
+  public static int length(Codepoint c) {
+    return c.getCharCount();
+  }
+  
+  public static int length(int c) {
+    return new Codepoint(c).getCharCount();
+  }
+  
+  public static int length(CharSequence c) {
+    return length(CodepointIterator.forCharSequence(c));
+  }
+  
+  public static int length(char[] c) {
+    return length(CodepointIterator.forCharArray(c));
+  }
+  
+  private static int length(CodepointIterator ci) {
+    int n = 0;
+    while(ci.hasNext()) {
+      ci.next(); n++;
+    }
+    return n;    
+  }
+
+  private static String supplementaryToString(int c) {
+    StringBuilder buf = new StringBuilder();
+    buf.append((char)getHighSurrogate(c));
+    buf.append((char)getLowSurrogate(c));
+    return buf.toString();
+  }
+  
+  public static String toString(int c) {
+    return (isSupplementary(c)) ? 
+      supplementaryToString(c) : 
+      String.valueOf((char)c);
+  }
+  
+  public static final char LRE = 0x202A; 
+  public static final char RLE = 0x202B; 
+  public static final char LRO = 0x202D; 
+  public static final char RLO = 0x202E; 
+  public static final char LRM = 0x200E; 
+  public static final char RLM = 0x200F;
+  public static final char PDF = 0x202C;
+  
+  /**
+   * Removes leading and trailing bidi controls from the string
+   */
+  public static String stripBidi(String s) {
+    if (s == null || s.length() <= 1) return s;
+    if (isBidi(s.charAt(0)))
+      s = s.substring(1);
+    if (isBidi(s.charAt(s.length()-1)))
+      s = s.substring(0,s.length()-1);
+    return s;
+  }
+    
+  public static String stripBidiInternal(String s) {
+    
+    return s.replaceAll("[\u202A\u202B\u202D\u202E\u200E\u200F\u202C]", "");
+  }
+
+  private static String wrap(String s, char c1, char c2) {
+    StringBuilder buf = new StringBuilder(s);
+    if (buf.length() > 1) {
+      if (buf.charAt(0) != c1) buf.insert(0, c1);
+      if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+    }
+    return buf.toString();
+  }
+  
+  /**
+   * Wrap the string with the specified bidi control
+   */
+  public static String wrapBidi(String s, char c) {
+    switch(c) {
+      case RLE: return wrap(s,RLE,PDF);
+      case RLO: return wrap(s,RLO,PDF);
+      case LRE: return wrap(s,LRE,PDF);
+      case LRO: return wrap(s,LRO,PDF);
+      case RLM: return wrap(s,RLM,RLM);
+      case LRM: return wrap(s,LRM,LRM);
+      default:  return s;
+    }
+  }
+  
+  public static boolean isDigit(Codepoint codepoint) {
+    return isDigit(codepoint.getValue());
+  }
+  
+  public static boolean isDigit(int codepoint) {
+    return CharUtils.inRange(codepoint, '0', '9');
+  }
+  
+  public static boolean isAlpha(Codepoint codepoint) {
+    return isAlpha(codepoint.getValue());
+  }
+  
+  public static boolean isAlpha(int codepoint) {
+    return CharUtils.inRange(codepoint, 'A', 'Z') ||
+           CharUtils.inRange(codepoint, 'a', 'z');
+  }
+
+  public static boolean isAlphaDigit(Codepoint codepoint) {
+    return isAlphaDigit(codepoint.getValue());
+  }
+  
+  public static boolean isAlphaDigit(int codepoint) {
+    return isDigit(codepoint) || isAlpha(codepoint);
+  }
+
+  public static boolean isBidi(Codepoint codepoint) {
+    return isBidi(codepoint.getValue());
+  }
+  
+  public static boolean isBidi(int codepoint) {
+    return  codepoint == LRM || // Left-to-right mark
+            codepoint == RLM || // Right-to-left mark
+            codepoint == LRE || // Left-to-right embedding
+            codepoint == RLE || // Right-to-left embedding
+            codepoint == LRO || // Left-to-right override
+            codepoint == RLO || // Right-to-left override
+            codepoint == PDF;   // Pop directional formatting
+  }
+  
+  public static int get_index(int[] set, int value) {
+    int s = 0, e = set.length;
+    while (e - s > 8) {
+      int i = (e + s) >> 1;
+      s = set[i] <= value ? i : s;
+      e = set[i] > value ? i : e;
+    }
+    while(s < e) {
+      if (value < set[s]) break;
+      s++;
+    }
+    return s == e ? -1 : s - 1;
+  }
+  
+  /**
+   * Treats the specified int array as an Inversion Set and returns
+   * true if the value is located within the set. This will only work
+   * correctly if the values in the int array are monotonically increasing
+   */
+  public static boolean invset_contains(
+    int[] set, 
+    int value) {
+      return (get_index(set,value) & 1) == 0;    
+  }
+  
+  
+  public static enum Profile {
+    NONE(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return true;
+        }
+      }
+    ),
+    ALPHA(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isAlpha(codepoint);
+        }
+      }
+    ),
+    ALPHANUM(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isAlphaDigit(codepoint);
+        }
+      }
+    ),
+    FRAGMENT(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isFragment(codepoint);
+        }
+      }
+    ),
+    IFRAGMENT(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_ifragment(codepoint);
+        }
+      }
+    ),
+    PATH(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isPath(codepoint);
+        }
+      }
+    ),
+    IPATH(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_ipath(codepoint);
+        }
+      }
+    ),
+    IUSERINFO(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iuserinfo(codepoint);
+        }
+      }
+    ),
+    USERINFO(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isUserInfo(codepoint);
+        }
+      }
+    ),
+    QUERY(
+        new Filter() {
+        public boolean accept(int codepoint) {
+          return !isQuery(codepoint);
+        }
+      }
+    ),
+    IQUERY(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iquery(codepoint);
+        }
+      }
+    ),
+    SCHEME(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isScheme(codepoint);
+        }
+      }
+    ),
+    PATHNODELIMS(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isPathNoDelims(codepoint);
+        }
+      }
+    ),
+    IPATHNODELIMS(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_ipathnodelims(codepoint);
+        }
+      }
+    ),
+    IREGNAME(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iregname(codepoint);
+        }
+      }
+    ),
+    IPRIVATE(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iprivate(codepoint);
+        }
+      }
+    ),
+    RESERVED(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isReserved(codepoint);
+        }
+      }
+    ),
+    IUNRESERVED(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iunreserved(codepoint);
+        }
+      }
+    ),
+    UNRESERVED(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !isUnreserved(codepoint);
+        }
+      }
+    ),
+    SCHEMESPECIFICPART(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_iunreserved(codepoint) && 
+                 !isReserved(codepoint) && 
+                 !is_iprivate(codepoint) && 
+                 !isPctEnc(codepoint) && 
+                 codepoint != '#';
+        }
+      }
+    ),
+    AUTHORITY(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !is_regname(codepoint) &&
+                 !isUserInfo(codepoint) && 
+                 !isGenDelim(codepoint);
+        }
+      }
+    ),
+    ASCIISANSCRLF(
+      new Filter() {
+        public boolean accept(int codepoint) {  
+          return !CharUtils.inRange(codepoint,1,9) &&   
+                 !CharUtils.inRange(codepoint,14,127);
+        }        
+      }
+    ),
+    PCT(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !CharUtils.isPctEnc(codepoint);
+        }
+      }
+    ),
+    STD3ASCIIRULES(
+      new Filter() {
+        public boolean accept(int codepoint) {
+          return !CharUtils.inRange(codepoint,0x0000,0x002C) &&
+                 !CharUtils.inRange(codepoint,0x002E,0x002F) &&
+                 !CharUtils.inRange(codepoint,0x003A,0x0040) &&
+                 !CharUtils.inRange(codepoint,0x005B,0x0060) &&
+                 !CharUtils.inRange(codepoint,0x007B,0x007F);          
+        }
+      }
+    )
+    ;
+    private final Filter filter;
+    Profile(Filter filter) {
+      this.filter = filter;
+    }
+    public Filter filter() {
+      return filter;
+    }
+    public boolean check(int codepoint) {
+      return filter.accept(codepoint);
+    }
+  }
+
+  public static boolean isPctEnc(int codepoint) {
+    return codepoint == '%' ||
+           isDigit(codepoint) ||
+           CharUtils.inRange(codepoint,'A','F') ||
+           CharUtils.inRange(codepoint,'a','f');
+  }
+  
+  public static boolean isMark(int codepoint) {
+    return codepoint == '-'  ||
+           codepoint == '_'  ||
+           codepoint == '.'  ||
+           codepoint == '!'  ||
+           codepoint == '~'  ||
+           codepoint == '*'  ||
+           codepoint == '\\' ||
+           codepoint == '\'' ||
+           codepoint == '('  ||
+           codepoint == ')';
+  }
+  
+  public static boolean isUnreserved(int codepoint) {
+    return isAlphaDigit(codepoint) ||  
+           codepoint == '-' || 
+           codepoint == '.' ||
+           codepoint == '_' ||
+           codepoint == '~';
+  }
+
+  public static boolean isReserved(int codepoint) {
+    return codepoint == '$' ||
+           codepoint == '&' ||
+           codepoint == '+' ||
+           codepoint == ',' ||
+           codepoint == '/' ||
+           codepoint == ':' ||
+           codepoint == ';' ||
+           codepoint == '=' ||
+           codepoint == '?' ||
+           codepoint == '@' ||
+           codepoint == '[' ||
+           codepoint == ']';
+  }
+  
+  public static boolean isGenDelim(int codepoint) {
+    return codepoint == '#' ||
+           codepoint == '/' ||
+           codepoint == ':' ||
+           codepoint == '?' ||
+           codepoint == '@' ||
+           codepoint == '[' ||
+           codepoint == ']';
+  }
+  
+  public static boolean isSubDelim(int codepoint) {
+    return codepoint == '!' || 
+           codepoint == '$' || 
+           codepoint == '&' || 
+           codepoint == '\'' || 
+           codepoint == '(' || 
+           codepoint == ')' || 
+           codepoint == '*' || 
+           codepoint == '+' || 
+           codepoint == ',' || 
+           codepoint == ';' || 
+           codepoint == '=' || 
+           codepoint == '\\';
+  }
+  
+  public static boolean isPchar(int codepoint) {
+    return isUnreserved(codepoint) ||  
+           codepoint == ':' || 
+           codepoint == '@' || 
+           codepoint == '&' || 
+           codepoint == '=' || 
+           codepoint == '+' || 
+           codepoint == '$' || 
+           codepoint == ',';
+  }
+
+  public static boolean isPath(int codepoint) {
+    return isPchar(codepoint) || 
+           codepoint == ';' ||
+           codepoint == '/' ||
+           codepoint == '%' || 
+           codepoint == ',';
+  }
+  
+  public static boolean isPathNoDelims(int codepoint) {
+    return isPath(codepoint) && !isGenDelim(codepoint);
+  }
+
+  public static boolean isScheme(int codepoint) {
+    return isAlphaDigit(codepoint) || 
+           codepoint == '+' || 
+           codepoint == '-' ||
+           codepoint == '.';
+  }
+  
+
+  public static boolean isUserInfo(int codepoint) {
+    return isUnreserved(codepoint) || 
+           isSubDelim(codepoint) ||
+           isPctEnc(codepoint);
+  }
+  
+  public static boolean isQuery(int codepoint) {
+    return isPchar(codepoint) ||
+           codepoint == ';' || 
+           codepoint == '/' ||
+           codepoint == '?' ||
+           codepoint == '%';
+  }
+
+  public static boolean isFragment(int codepoint) {
+    return isPchar(codepoint) ||
+           codepoint == '/' ||
+           codepoint == '?' ||
+           codepoint == '%';
+  }
+  
+  public static boolean is_ucschar(int codepoint) {    
+    return 
+        CharUtils.inRange(codepoint,'\u00A0', '\uD7FF') ||
+        CharUtils.inRange(codepoint,'\uF900','\uFDCF') ||
+        CharUtils.inRange(codepoint,'\uFDF0','\uFFEF') ||
+        CharUtils.inRange(codepoint,0x10000,0x1FFFD) ||
+        CharUtils.inRange(codepoint,0x20000,0x2FFFD) ||
+        CharUtils.inRange(codepoint,0x30000,0x3FFFD) ||
+        CharUtils.inRange(codepoint,0x40000,0x4FFFD) ||
+        CharUtils.inRange(codepoint,0x50000,0x5FFFD) ||
+        CharUtils.inRange(codepoint,0x60000,0x6FFFD) ||
+        CharUtils.inRange(codepoint,0x70000,0x7FFFD) ||
+        CharUtils.inRange(codepoint,0x80000,0x8FFFD) ||
+        CharUtils.inRange(codepoint,0x90000,0x9FFFD) ||
+        CharUtils.inRange(codepoint,0xA0000,0xAFFFD) ||
+        CharUtils.inRange(codepoint,0xB0000,0xBFFFD) ||
+        CharUtils.inRange(codepoint,0xC0000,0xCFFFD) ||
+        CharUtils.inRange(codepoint,0xD0000,0xDFFFD) ||
+        CharUtils.inRange(codepoint,0xE1000,0xEFFFD);
+  }
+
+  public static boolean is_iprivate(int codepoint) {
+    return
+      CharUtils.inRange(codepoint,'\uE000', '\uF8FF') ||
+      CharUtils.inRange(codepoint, 0xF0000,0xFFFFD) ||
+      CharUtils.inRange(codepoint, 0x100000,0x10FFFD);
+  }
+  
+  public static boolean is_iunreserved(int codepoint) {
+    return isAlphaDigit(codepoint) || isMark(codepoint) || is_ucschar(codepoint);
+  }
+
+  public static boolean is_ipchar(int codepoint) {
+    return is_iunreserved(codepoint) || 
+           codepoint == ':' || 
+           codepoint == '@' || 
+           codepoint == '&' || 
+           codepoint == '=' || 
+           codepoint == '+' || 
+           codepoint == '$';
+  }
+
+  public static boolean is_ipath(int codepoint) {
+    return is_ipchar(codepoint) || 
+           codepoint == ';' ||
+           codepoint == '/' ||
+           codepoint == '%' || 
+           codepoint == ',';
+  }
+  
+  public static boolean is_ipathnodelims(int codepoint) {
+    return is_ipath(codepoint) && !isGenDelim(codepoint);
+  }
+
+  public static boolean is_iquery(int codepoint) {
+    return is_ipchar(codepoint) || 
+           is_iprivate(codepoint) || 
+           codepoint == ';' ||
+           codepoint == '/' ||
+           codepoint == '?' ||
+           codepoint == '%';
+  }
+
+  public static boolean is_ifragment(int codepoint) {
+    return is_ipchar(codepoint) || 
+           is_iprivate(codepoint) || 
+           codepoint == '/' ||
+           codepoint == '?' ||
+           codepoint == '%';
+  }  
+  
+  public static boolean is_iregname(int codepoint) {
+    return is_iunreserved(codepoint) || 
+           codepoint == '!'  || 
+           codepoint == '$'  || 
+           codepoint == '&'  || 
+           codepoint == '\'' || 
+           codepoint == '('  || 
+           codepoint == ')'  || 
+           codepoint == '*'  || 
+           codepoint == '+'  || 
+           codepoint == ','  || 
+           codepoint == ';'  || 
+           codepoint == '='  || 
+           codepoint == '"';
+  }
+  
+  public static boolean is_regname(int codepoint) {
+    return isUnreserved(codepoint) || 
+           codepoint == '!'  || 
+           codepoint == '$'  || 
+           codepoint == '&'  || 
+           codepoint == '\'' || 
+           codepoint == '('  || 
+           codepoint == ')'  || 
+           codepoint == '*'  || 
+           codepoint == '+'  || 
+           codepoint == ','  || 
+           codepoint == ';'  || 
+           codepoint == '='  || 
+           codepoint == '"';
+  }
+  
+  public static boolean is_iuserinfo(int codepoint) {
+    return is_iunreserved(codepoint) || 
+           codepoint == ';' || 
+           codepoint == ':' || 
+           codepoint == '&' || 
+           codepoint == '=' || 
+           codepoint == '+' || 
+           codepoint == '$' || 
+           codepoint == ',';
+  }
+  
+  public static boolean is_iserver(int codepoint) {
+    return is_iuserinfo(codepoint) || 
+           is_iregname(codepoint) || 
+           isAlphaDigit(codepoint) || 
+           codepoint == '.' || 
+           codepoint == ':' || 
+           codepoint == '@' || 
+           codepoint == '[' || 
+           codepoint == ']' || 
+           codepoint == '%' || 
+           codepoint == '-';
+  }
+
+  public static void verify(CodepointIterator ci, Profile profile) throws InvalidCharacterException {
+    CodepointIterator rci = CodepointIterator.restrict(ci, profile.filter());
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(char[] s, Profile profile) throws InvalidCharacterException {
+    if (s == null) return;
+    verify(CodepointIterator.forCharArray(s),profile);
+  }
+  
+  public static void verify(String s, Profile profile) throws InvalidCharacterException {
+    if (s == null) return;
+    verify(CodepointIterator.forCharSequence(s),profile);
+  }
+  
+  public static void verifyNot(CodepointIterator ci, Profile profile) throws InvalidCharacterException {
+    CodepointIterator rci = ci.restrict(profile.filter(),false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verifyNot(char[] array, Profile profile) throws InvalidCharacterException {
+    CodepointIterator rci = 
+      CodepointIterator.forCharArray(array)
+        .restrict(profile.filter(),false,true);
+    while (rci.hasNext()) rci.next();
+  }
+
+}
+

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,169 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+
+public class Codepoint
+  implements Serializable, 
+             Cloneable,
+             Comparable<Codepoint>{
+
+  private static final long serialVersionUID = 140337939131905483L;
+  
+  private static final String DEFAULT_ENCODING = "UTF-8";
+  private final int value;
+
+  public Codepoint(byte[] bytes) {
+    try {
+      this.value = valueFromCharSequence(new String(bytes,DEFAULT_ENCODING));
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public Codepoint(
+    byte[] bytes, 
+    String encoding) 
+      throws UnsupportedEncodingException {
+    this.value = valueFromCharSequence(new String(bytes,encoding));
+  }
+  
+  public Codepoint(CharSequence value) {
+    this(valueFromCharSequence(value));
+  }
+  
+  private static int valueFromCharSequence(CharSequence s) {
+    if (s.length() == 1) {
+      return (int)s.charAt(0);
+    } else if (s.length() > 2) {
+      throw new IllegalArgumentException("Too many chars");
+    } else {
+      char high = s.charAt(0);
+      char low = s.charAt(1);
+      return CharUtils.toSupplementary(high, low).getValue();
+    }
+  }
+  
+  public Codepoint(char value) {
+    this((int)value);
+  }
+  
+  public Codepoint(char high, char low) {
+    this(CharUtils.toSupplementary(high, low).getValue());
+  }
+  
+  public Codepoint(Codepoint codepoint) {
+    this(codepoint.value);
+  }
+  
+  public Codepoint(int value) {
+    if (value < 0) 
+      throw new IllegalArgumentException(
+        "Invalid Codepoint");
+    this.value = value;
+  }
+  
+  public int getValue() {
+    return value;
+  }
+  
+  public boolean isSupplementary() {
+    return CharUtils.isSupplementary(value);
+  }
+  
+  public boolean isLowSurrogate() {
+    return CharUtils.isLowSurrogate((char)value);
+  }
+  
+  public boolean isHighSurrogate() {
+    return CharUtils.isHighSurrogate((char)value);
+  }
+  
+  public int compareTo(Codepoint o) {
+    return value < o.value ? -1 :
+           value == o.value ? 0 : 1;
+  }
+  
+  public String toString() {
+    return CharUtils.toString(value);
+  }
+  
+  public char[] toChars() {
+    return toString().toCharArray();
+  }
+  
+  public int getCharCount() {
+    return toChars().length;
+  }
+  
+  public byte[] toBytes() {
+    try {
+      return toBytes(DEFAULT_ENCODING);
+    } catch (UnsupportedEncodingException e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public byte[] toBytes(
+    String encoding) 
+      throws UnsupportedEncodingException {
+    return toString().getBytes(encoding);
+  }
+
+  @Override 
+  public int hashCode() {
+    final int prime = 31;
+    int result = 1;
+    result = prime * result + value;
+    return result;
+  }
+
+  @Override 
+  public boolean equals(Object obj) {
+    if (this == obj) return true;
+    if (obj == null) return false;
+    if (getClass() != obj.getClass()) return false;
+    final Codepoint other = (Codepoint) obj;
+    if (value != other.value) return false;
+    return true;
+  }
+   
+  /**
+   * Plane 0 (0000–FFFF): Basic Multilingual Plane (BMP). This is the plane containing most of the character assignments so far. A primary objective for the BMP is to support the unification of prior character sets as well as characters for writing systems in current use.
+   * Plane 1 (10000–1FFFF): Supplementary Multilingual Plane (SMP).
+   * Plane 2 (20000–2FFFF): Supplementary Ideographic Plane (SIP)
+   * Planes 3 to 13 (30000–DFFFF) are unassigned
+   * Plane 14 (E0000–EFFFF): Supplementary Special-purpose Plane (SSP)
+   * Plane 15 (F0000–FFFFF) reserved for the Private Use Area (PUA)
+   * Plane 16 (100000–10FFFF), reserved for the Private Use Area (PUA)
+   **/
+  public int getPlane() {
+    return value / (0xFFFF + 1);
+  }
+  
+  public Codepoint clone() {
+    try {
+      return (Codepoint) super.clone();
+    } catch (CloneNotSupportedException e) {
+      return new Codepoint(value);
+    }
+  }
+  
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,515 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+
+
+/**
+ * Provides an iterator over Unicode Codepoints
+ */
+public abstract class CodepointIterator 
+  implements Iterator<Codepoint> {
+
+  /**
+   * Get a CodepointIterator for the specified char array
+   */
+  public static CodepointIterator forCharArray(char[] array) {
+    return new CharArrayCodepointIterator(array);
+  }
+  
+  /**
+   * Get a CodepointIterator for the specified CharSequence
+   */
+  public static CodepointIterator forCharSequence(CharSequence seq) {
+    return new CharSequenceCodepointIterator(seq);
+  }
+  
+  /**
+   * Get a CodepointIterator for the specified byte array, using the default charset
+   */
+  public static CodepointIterator forByteArray(byte[] array) {
+    return new ByteArrayCodepointIterator(array);
+  }
+
+  /**
+   * Get a CodepointIterator for the specified byte array, using the specified charset
+   */
+  public static CodepointIterator forByteArray(byte[] array, String charset) {
+    return new ByteArrayCodepointIterator(array,charset);
+  }
+  
+  /**
+   * Get a CodepointIterator for the specified CharBuffer
+   */
+  public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+    return new CharBufferCodepointIterator(buffer);
+  }
+  
+  /**
+   * Get a CodepointIterator for the specified ReadableByteChannel
+   */
+  public static CodepointIterator forReadableByteChannel(ReadableByteChannel channel) {
+    return new ReadableByteChannelCodepointIterator(channel);
+  }
+
+  /**
+   * Get a CodepointIterator for the specified ReadableByteChannel
+   */
+  public static CodepointIterator forReadableByteChannel(ReadableByteChannel channel, String charset) {
+    return new ReadableByteChannelCodepointIterator(channel,charset);
+  }
+
+  /**
+   * Get a CodepointIterator for the specified InputStream
+   */
+  public static CodepointIterator forInputStream(InputStream in) {
+    return new ReadableByteChannelCodepointIterator(Channels.newChannel(in));
+  }
+
+  /**
+   * Get a CodepointIterator for the specified InputStream using the specified charset
+   */
+  public static CodepointIterator forInputStream(InputStream in, String charset) {
+    return new ReadableByteChannelCodepointIterator(Channels.newChannel(in),charset);
+  }
+  
+  /**
+   * Get a CodepointIterator for the specified Reader
+   */
+  public static CodepointIterator forReader(Reader in) {
+    return new ReaderCodepointIterator(in);
+  }
+
+  public static CodepointIterator restrict(CodepointIterator ci, Filter filter) {
+    return new RestrictedCodepointIterator(ci,filter,false);
+  }
+  
+  public static CodepointIterator restrict(CodepointIterator ci, Filter filter, boolean scanning) {
+    return new RestrictedCodepointIterator(ci,filter,scanning);
+  }
+  
+  public static CodepointIterator restrict(CodepointIterator ci, Filter filter, boolean scanning, boolean invert) {
+    return new RestrictedCodepointIterator(ci,filter,scanning,invert);
+  }
+  
+  protected int position = -1;
+  protected int limit = -1;
+
+  public CodepointIterator restrict(Filter filter) {
+    return restrict(this, filter);
+  }
+  
+  public CodepointIterator restrict(Filter filter, boolean scanning) {
+    return restrict(this, filter, scanning);
+  }
+  
+  public CodepointIterator restrict(Filter filter, boolean scanning, boolean invert) {
+    return restrict(this, filter, scanning, invert);
+  }
+  
+  /**
+   * Get the next char
+   */
+  protected abstract char get();
+  
+  /**
+   * Get the specified char
+   */
+  protected abstract char get(int index);
+  
+  /**
+   * True if there are codepoints remaining
+   */
+  public boolean hasNext() {
+    return remaining() > 0;
+  }
+  
+  /**
+   * Return the final index position
+   */
+  public int lastPosition() {
+    int p = position();
+    return (p > -1) ? 
+      (p >= limit()) ? p : p - 1 : -1;
+  }
+  
+  /**
+   * Return the next chars.  If the codepoint is not supplemental,
+   * the char array will have a single member.  If the codepoint is 
+   * supplemental, the char array will have two members, representing
+   * the high and low surrogate chars
+   */
+  public char[] nextChars() throws InvalidCharacterException {
+    if (hasNext()) {
+      if (isNextSurrogate()) {
+        char c1 = get();
+        if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+          char c2 = get();
+          if (CharUtils.isLowSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+          char c2 = get(position()-2);
+          if (CharUtils.isHighSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        }
+      }
+      return new char[] {get()}; 
+    } 
+    return null;
+  }
+
+  /**
+   * Peek the next chars in the iterator. If the codepoint is not supplemental,
+   * the char array will have a single member.  If the codepoint is 
+   * supplemental, the char array will have two members, representing
+   * the high and low surrogate chars
+   */
+  public char[] peekChars() throws InvalidCharacterException {
+    return peekChars(position());
+  }
+  
+  /**
+   * Peek the specified chars in the iterator. If the codepoint is not supplemental,
+   * the char array will have a single member.  If the codepoint is 
+   * supplemental, the char array will have two members, representing
+   * the high and low surrogate chars
+   */
+  private char[] peekChars(int pos) throws InvalidCharacterException {
+    if (pos < 0 || pos >= limit()) return null;
+    char c1 = get(pos);
+    if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+      char c2 = get(pos+1);
+      if (CharUtils.isLowSurrogate(c2)) {
+        return new char[] {c1,c2};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+      char c2 = get(pos-1);
+      if (CharUtils.isHighSurrogate(c2)) {
+        return new char[] {c2,c1};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else  return new char[] {c1}; 
+  }
+  
+  /**
+   * Return the next codepoint
+   */
+  public Codepoint next() throws InvalidCharacterException {
+    return toCodepoint(nextChars());
+  }
+
+  /**
+   * Peek the next codepoint
+   */
+  public Codepoint peek() throws InvalidCharacterException {
+    return toCodepoint(peekChars());
+  }
+  
+  /**
+   * Peek the specified codepoint
+   */
+  public Codepoint peek(int index) throws InvalidCharacterException {
+    return toCodepoint(peekChars(index));
+  }
+
+  private Codepoint toCodepoint(char[] chars) {
+    return (chars == null) ? null :
+      (chars.length == 1) ? new Codepoint(chars[0]) :
+      CharUtils.toSupplementary(chars[0], chars[1]);
+  }
+  
+  /**
+   * Set the iterator position
+   */
+  public void position(int n) {
+    if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+    position = n;
+  }
+  
+  /**
+   * Get the iterator position
+   */
+  public int position() {
+    return position;
+  }
+
+  /**
+   * Return the iterator limit
+   */
+  public int limit() {
+    return limit;
+  }
+  
+  /**
+   * Return the remaining iterator size
+   */
+  public int remaining() {
+    return limit - position();
+  }
+  
+  private boolean isNextSurrogate() {
+    if (!hasNext()) return false;
+    char c = get(position());
+    return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+  }
+
+  /**
+   * Returns true if the char at the specified index is a high surrogate
+   */
+  public boolean isHigh(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isHighSurrogate(get(index));
+  }
+
+  /**
+   * Returns true if the char at the specified index is a low surrogate
+   */
+  public boolean isLow(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isLowSurrogate(get(index));
+  }
+
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+  
+  
+  static class ByteArrayCodepointIterator 
+    extends CharArrayCodepointIterator {
+    public ByteArrayCodepointIterator(byte[] bytes) {
+      this(bytes,Charset.defaultCharset());
+    }
+    public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+      this(bytes,Charset.forName(charset));
+    }
+    public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+      CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+      buffer = cb.array();
+      position = cb.position();
+      limit = cb.limit();
+    }
+  }
+  
+  static class CharArrayCodepointIterator 
+    extends CodepointIterator {
+    protected char[] buffer;
+    protected CharArrayCodepointIterator() {}    
+    public CharArrayCodepointIterator(char[] buffer) {
+      this(buffer,0,buffer.length);
+    }  
+    public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+      this.buffer = buffer;
+      this.position = n;
+      this.limit = Math.min(buffer.length-n,e);
+    }  
+    protected char get() {
+      return (position < limit) ? buffer[position++] : (char)-1;
+    }    
+    protected char get(int index) {
+      if (index < 0 || index >= limit) 
+        throw new ArrayIndexOutOfBoundsException(index);
+      return buffer[index];
+    }  
+  }
+  
+  static class CharBufferCodepointIterator 
+    extends CharArrayCodepointIterator {
+    public CharBufferCodepointIterator(CharBuffer cb) {
+      buffer = cb.array();
+      position = cb.position();
+      limit = cb.limit();
+    }
+  }
+  
+  static class CharSequenceCodepointIterator 
+    extends CodepointIterator {
+    private CharSequence buffer;
+    public CharSequenceCodepointIterator(CharSequence buffer) {
+      this(buffer,0,buffer.length());
+    }
+    public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+      this.buffer = buffer;
+      this.position = n;
+      this.limit = Math.min(buffer.length()-n,e);
+    }
+    protected char get() {
+      return buffer.charAt(position++);
+    }
+    protected char get(int index) {
+      return buffer.charAt(index);
+    }
+  }
+  
+  static class ReadableByteChannelCodepointIterator 
+    extends CharArrayCodepointIterator {
+    public ReadableByteChannelCodepointIterator(
+      ReadableByteChannel channel) {
+        this(channel,Charset.defaultCharset());
+    }
+    public ReadableByteChannelCodepointIterator(
+      ReadableByteChannel channel, 
+      String charset) {
+        this(channel,Charset.forName(charset));
+    }
+    public ReadableByteChannelCodepointIterator(
+      ReadableByteChannel channel, 
+      Charset charset) {
+        try {
+          ByteBuffer buf = ByteBuffer.allocate(1024);
+          ByteArrayOutputStream out = new ByteArrayOutputStream();
+          WritableByteChannel outc = Channels.newChannel(out);
+          while(channel.read(buf) > 0) {
+            buf.flip();
+            outc.write(buf);
+          }
+          CharBuffer cb = charset.decode(ByteBuffer.wrap(out.toByteArray()));
+          buffer = cb.array();
+          position = cb.position();
+          limit = cb.limit();
+        } catch (Exception e) {}
+    }    
+  }
+  
+  static class ReaderCodepointIterator 
+    extends CharArrayCodepointIterator {
+    public ReaderCodepointIterator(Reader reader) {
+      try {
+        StringBuilder sb = new StringBuilder();
+        char[] buf = new char[1024];
+        int n = -1;
+        while((n = reader.read(buf)) > -1) {
+          sb.append(buf,0,n);
+        }
+        buffer = new char[sb.length()];
+        sb.getChars(0, sb.length(), buffer, 0);
+        position = 0;
+        limit = buffer.length;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+  }
+  
+  
+  public static class RestrictedCodepointIterator 
+    extends DelegatingCodepointIterator {
+  
+    private final Filter filter;
+    private final boolean scanningOnly;
+    private final boolean notset;
+  
+    protected RestrictedCodepointIterator(
+      CodepointIterator internal, 
+      Filter filter) {
+        this(internal,filter,false);
+    }
+  
+    protected RestrictedCodepointIterator(
+      CodepointIterator internal, 
+      Filter filter,
+      boolean scanningOnly) {
+        this(internal, filter, scanningOnly, false);
+    }
+    
+    protected RestrictedCodepointIterator(
+      CodepointIterator internal, 
+      Filter filter,
+      boolean scanningOnly,
+      boolean notset) {
+        super(internal);
+        this.filter = filter;
+        this.scanningOnly = scanningOnly;
+        this.notset = notset;
+    }
+  
+    public boolean hasNext() {
+      boolean b = super.hasNext();
+      if (scanningOnly) {
+        try {
+          int cp = peek(position()).getValue();
+          if (b && cp != -1 && check(cp)) return false;
+        } catch (InvalidCharacterException e) { return false; }
+      } 
+      return b;
+    }
+    
+    @Override
+    public Codepoint next() throws InvalidCharacterException {
+      int cp = super.next().getValue();
+      if (cp != -1 && check(cp)) {
+        if (scanningOnly) {
+          position(position()-1);
+          return null;
+        }
+        else throw new InvalidCharacterException(cp);
+      }
+      return new Codepoint(cp);
+    }
+  
+    private boolean check(int cp) {
+      boolean answer = !filter.accept(cp);
+      return (!notset) ? !answer : answer;
+    }
+    
+    @Override
+    public char[] nextChars() throws InvalidCharacterException {
+      char[] chars = super.nextChars();
+      if (chars != null && chars.length > 0) {
+        if (chars.length == 1 && check(chars[0])) {
+          if (scanningOnly) {
+            position(position()-1);
+            return null;
+          }
+          else throw new InvalidCharacterException(chars[0]);
+        } else if (chars.length == 2) {
+          int cp = CharUtils.toSupplementary(chars[0],chars[1]).getValue();
+          if (check(cp)) {
+            if (scanningOnly) {
+              position(position()-2);
+              return null; 
+            }
+            else throw new InvalidCharacterException(cp);
+          }
+        }
+      }
+      return chars;
+    }
+   
+  }
+
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,104 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class DelegatingCodepointIterator 
+  extends CodepointIterator {
+
+  private CodepointIterator internal;
+  
+  protected DelegatingCodepointIterator(
+    CodepointIterator internal) {
+    this.internal = internal;
+  }
+  
+  @Override
+  protected char get() {
+    return internal.get();
+  }
+
+  @Override
+  protected char get(int index) {
+    return internal.get(index);
+  }
+
+  @Override
+  public boolean hasNext() {
+    return internal.hasNext();
+  }
+
+  @Override
+  public boolean isHigh(int index) {
+    return internal.isHigh(index);
+  }
+
+  @Override
+  public boolean isLow(int index) {
+    return internal.isLow(index);
+  }
+
+  @Override
+  public int limit() {
+    return internal.limit();
+  }
+
+  @Override
+  public Codepoint next() {
+    return internal.next();
+  }
+
+  @Override
+  public char[] nextChars(){
+    return internal.nextChars();
+  }
+
+  @Override
+  public Codepoint peek() {
+    return internal.peek();
+  }
+
+  @Override
+  public Codepoint peek(int index) {
+    return internal.peek(index);
+  }
+
+  @Override
+  public char[] peekChars() {
+    return internal.peekChars();
+  }
+
+  @Override
+  public int position() {
+    return internal.position();
+  }
+
+  @Override
+  public int remaining() {
+    return internal.remaining();
+  }
+  
+  @Override
+  public void position(int position) {
+    internal.position(position);
+  }
+
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,29 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+public interface Filter {
+
+  boolean accept(int c);
+  
+  public static final Filter NONOPFILTER = new Filter() {
+    public boolean accept(int c) {
+      return true;
+    }
+  };
+}

Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,37 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+public class InvalidCharacterException 
+  extends RuntimeException {
+
+  private static final long serialVersionUID = -7150645484748059676L;
+  private int input;
+  
+  public InvalidCharacterException(int input) {
+    this.input = input;
+  }
+
+  @Override
+  public String getMessage() {
+    return "Invalid Character 0x" + Integer.toHexString(input) + "(" + (char)input + ")";
+  }
+
+  
+}