You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2006/09/22 07:40:03 UTC

svn commit: r448818 [2/3] - in /incubator/abdera/java/trunk/core/src: main/java/org/apache/abdera/util/ main/java/org/apache/abdera/util/io/ main/java/org/apache/abdera/util/iri/ main/java/org/apache/abdera/util/lang/ main/java/org/apache/abdera/util/u...

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,797 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.MalformedURLException;
+import java.net.URISyntaxException;
+import java.util.BitSet;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.unicode.Normalizer;
+
+
+public class IRI 
+  implements Serializable, 
+             Cloneable {
+
+  private static final long serialVersionUID = -4530530782760282284L;
+  private Scheme _scheme;
+  private String scheme;
+  private String authority;
+  private String userinfo;
+  private String host;
+  private int port;
+  private String path;
+  private String query;
+  private String fragment;
+  private boolean doubleslash;
+
+  private String a_host;
+  private String a_fragment;
+  private String a_path;
+  private String a_query;
+  private String a_userinfo;
+  private String a_authority;
+  
+  private String d_authority;
+  private String d_userinfo;
+  private String d_host;
+  private String d_path;
+  private String d_query;
+  private String d_fragment;
+  
+  public IRI(java.net.URL url) throws IRISyntaxException, IOException {
+    this(Escaping.encode(
+        Escaping.decode(url.toString()), 
+        Constants.IUNRESERVED, 
+        Constants.RESERVED, 
+        Constants.PCTENC));
+  }
+  
+  public IRI(java.net.URI uri) throws IRISyntaxException, IOException {
+    this(Escaping.encode(
+      Escaping.decode(uri.toString()), 
+      Constants.IUNRESERVED, 
+      Constants.RESERVED, 
+      Constants.PCTENC,
+      Constants.GENDELIMS));
+  }
+  
+  public IRI(String iri) throws IRISyntaxException, IOException {
+    Builder b = new Builder();
+    parse(iri, b);
+    init(
+      b.schemeobj,
+      b.scheme,
+      b.authority,
+      b.userinfo,
+      b.host,
+      b.port,
+      b.path,
+      b.query,
+      b.fragment,
+      b.doubleslash);
+  }
+  
+  public IRI(String iri, Normalizer.Form nf) throws IRISyntaxException, IOException {
+    this(Normalizer.normalize(iri,nf).toString());
+  }
+  
+  public IRI(
+    String scheme, 
+    String userinfo, 
+    String host, 
+    int port, 
+    String path, 
+    String query, 
+    String fragment) {
+      SchemeRegistry reg = SchemeRegistry.getInstance();
+      Scheme _scheme = reg.getScheme(scheme);
+      StringBuffer buf = new StringBuffer();
+      buildAuthority(buf,userinfo, host, port);
+      String authority = (buf.length()!=0)?buf.toString():null;
+      boolean doubleslash = (authority != null);
+      init(_scheme,scheme,authority,userinfo,
+        host,port,path,query,fragment,doubleslash);
+  }
+  
+  public IRI(
+    String scheme,
+    String authority,
+    String path,
+    String query,
+    String fragment) {
+      Builder builder = new Builder();
+      if (authority != null)
+        splitAuthority(authority, builder);
+      SchemeRegistry reg = SchemeRegistry.getInstance();
+      Scheme _scheme = reg.getScheme(scheme);
+      boolean doubleslash = (authority != null);
+      init(_scheme,scheme,authority,builder.userinfo,
+        builder.host,builder.port,path,query,
+        fragment,doubleslash);
+  }
+  
+  public IRI(
+    String scheme,
+    String host,
+    String path,
+    String fragment) {
+      this(scheme, null, host, -1, path, null, fragment);
+  }
+  
+  IRI(
+    Scheme _scheme,
+    String scheme,
+    String authority,
+    String userinfo,
+    String host,
+    int port,
+    String path,
+    String query,
+    String fragment,
+    boolean doubleslash) {
+      init(_scheme,scheme,authority,userinfo,
+         host,port,path,query,fragment,doubleslash);
+  }
+  
+  private void init(
+      Scheme _scheme,
+      String scheme,
+      String authority,
+      String userinfo,
+      String host,
+      int port,
+      String path,
+      String query,
+      String fragment,
+      boolean doubleslash) {
+    this._scheme = _scheme;
+    this.scheme = scheme;
+    this.authority = authority;
+    this.userinfo = userinfo;
+    this.host = host;
+    this.port = port;
+    this.path = (path != null) ? path : "";
+    this.query = query;
+    this.fragment = fragment;
+    this.doubleslash = doubleslash;
+    
+    d_authority = Escaping.decode(authority);
+    d_userinfo = Escaping.decode(userinfo);
+    d_path = Escaping.decode(path);
+    d_query = Escaping.decode(query);
+    d_fragment = Escaping.decode(fragment);
+    d_host = Escaping.decode(host);
+
+    a_host = IDNA.toASCII(d_host);
+    a_fragment = Escaping.encode(getFragment(),Constants.FRAGMENT);
+    a_path = normalize(Escaping.encode(getPath(), Constants.PATH));
+    a_query = Escaping.encode(getQuery(),Constants.QUERY);
+    a_userinfo = Escaping.encode(getUserInfo(),Constants.USERINFO);
+    a_authority = buildASCIIAuthority();
+  }
+    
+  @Override
+  public int hashCode() {
+    final int PRIME = 31;
+    int result = 1;
+    result = PRIME * result + ((authority == null) ? 0 : authority.hashCode());
+    result = PRIME * result + ((fragment == null) ? 0 : fragment.hashCode());
+    result = PRIME * result + ((host == null) ? 0 : host.hashCode());
+    result = PRIME * result + ((path == null) ? 0 : path.hashCode());
+    result = PRIME * result + port;
+    result = PRIME * result + ((query == null) ? 0 : query.hashCode());
+    result = PRIME * result + ((scheme == null) ? 0 : scheme.hashCode());
+    result = PRIME * result + ((userinfo == null) ? 0 : userinfo.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    final IRI other = (IRI) obj;
+    if (authority == null) {
+      if (other.authority != null)
+        return false;
+    } else if (!authority.equals(other.authority))
+      return false;
+    if (fragment == null) {
+      if (other.fragment != null)
+        return false;
+    } else if (!fragment.equals(other.fragment))
+      return false;
+    if (host == null) {
+      if (other.host != null)
+        return false;
+    } else if (!host.equals(other.host))
+      return false;
+    if (path == null) {
+      if (other.path != null)
+        return false;
+    } else if (!path.equals(other.path))
+      return false;
+    if (port != other.port)
+      return false;
+    if (query == null) {
+      if (other.query != null)
+        return false;
+    } else if (!query.equals(other.query))
+      return false;
+    if (scheme == null) {
+      if (other.scheme != null)
+        return false;
+    } else if (!scheme.equals(other.scheme))
+      return false;
+    if (userinfo == null) {
+      if (other.userinfo != null)
+        return false;
+    } else if (!userinfo.equals(other.userinfo))
+      return false;
+    return true;
+  }
+
+  public boolean equivalent(IRI uri) {
+    if (_scheme != null) return _scheme.equivalent(this, uri);
+    else {
+      String s2 = uri.normalize().toASCIIString();
+      String s1 = this.normalize().toASCIIString();
+      return s1.compareTo(s2) == 0;
+    }
+  }
+  
+  public String getAuthority() {
+    return d_authority;
+  }
+  
+  public String getFragment() {
+    return d_fragment;
+  }
+  
+  public String getHost() {
+    return d_host;
+  }
+  
+  public IDNA getIDN() {
+    return new IDNA(d_host);
+  }
+  
+  public String getASCIIHost() {
+    return a_host;
+  }
+  
+  public String getPath() {
+    return d_path;
+  }
+  
+  public int getPort() {
+    return port;
+  }
+  
+  public String getQuery() {
+    return d_query;
+  }
+  
+  public String getScheme() {
+    return (scheme != null) ? scheme.toLowerCase() : null;
+  }
+  
+  public String getSchemeSpecificPart() {
+    return buildSchemeSpecificPart(
+      userinfo, 
+      host, 
+      port, 
+      path, 
+      query, 
+      fragment);
+  }
+  
+  public String getUserInfo() {
+    return d_userinfo;
+  }
+  
+  public String getRawAuthority() {
+    return authority;
+  }
+  
+  public String getRawFragment() {
+    return fragment;
+  }
+  
+  public String getRawPath() {
+    return path;
+  }
+  
+  public String getRawQuery() {
+    return query;
+  }
+  
+  public String getRawSchemeSpecificPart() {
+    return buildSchemeSpecificPart(
+      userinfo, 
+      host, 
+      port, 
+      path, 
+      query, 
+      fragment);
+  }
+  
+  public String getRawUserInfo() {
+    return userinfo;
+  }
+  
+  private void buildAuthority(
+    StringBuffer buf, 
+    String aui, 
+    String ah, 
+    int port) {
+    if (aui != null && aui.length() != 0) {
+      buf.append(aui);
+      buf.append('@');
+    }
+    if (ah != null && ah.length() != 0) {
+      buf.append(ah);
+    }
+    if (port != -1) {
+      buf.append(':');
+      buf.append(port);
+    }
+  }
+  
+  private String buildASCIIAuthority() {
+    StringBuffer buf = new StringBuffer();
+    String aui = getASCIIUserInfo();
+    String ah = getASCIIHost();
+    int port = getPort();
+    buildAuthority(buf,aui,ah,port);
+    return buf.toString();
+  }
+  
+  public String getASCIIAuthority() {
+    return a_authority;
+  }
+  
+  public String getASCIIFragment() {
+    return a_fragment;
+  }
+  
+  public String getASCIIPath() {
+    return a_path;
+  }
+  
+  public String getASCIIQuery() {
+    return a_query;
+  }
+  
+  public String getASCIIUserInfo() {
+    return a_userinfo;
+  }
+  
+  public String getASCIISchemeSpecificPart() {
+    return buildSchemeSpecificPart(
+      getASCIIUserInfo(), 
+      getASCIIHost(), 
+      getPort(), 
+      getASCIIPath(), 
+      getASCIIQuery(), 
+      getASCIIFragment());
+  }
+  
+  private String buildSchemeSpecificPart(
+    String userinfo,
+    String host,
+    int port,
+    String path,
+    String query,
+    String fragment) {
+      StringBuffer buf = new StringBuffer();
+      if (doubleslash) buf.append("//");
+      buildAuthority(buf, userinfo, host, port);
+      if (path != null && path.length() != 0) {
+        buf.append(path);
+      }
+      if (query != null && query.length() != 0) {
+        buf.append('?');
+        buf.append(query);
+      }
+      if (fragment != null && fragment.length() != 0) {
+        buf.append('#');
+        buf.append(fragment);
+      }
+      return buf.toString();
+  }
+  
+  public Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+  
+  public boolean isAbsolute() {
+    return scheme != null;
+  }
+  
+  public boolean isOpaque() {
+    return path == null;
+  }
+  
+  public static IRI relativize(IRI b, IRI c) {
+    if (c.isOpaque() || b.isOpaque()) return c;
+    if ((b.scheme == null && c.scheme != null) ||
+        (b.scheme != null && c.scheme == null) ||
+        (b.scheme != null && c.scheme != null && 
+          !b.scheme.equalsIgnoreCase(c.scheme))) return c;
+    String bpath = normalize(b.getPath());
+    String cpath = normalize(c.getPath());
+    bpath = (bpath != null) ? bpath : "/";
+    cpath = (cpath != null) ? cpath : "/";
+    if (!bpath.equals(cpath)) {
+      if (bpath.charAt(bpath.length()-1) != '/') bpath += "/";
+      if (!cpath.startsWith(bpath)) return c;
+    } 
+    IRI iri = new IRI(
+      null,
+      null,null,null,null,-1,
+      normalize(cpath.substring(bpath.length())), 
+      c.getQuery(), 
+      c.getFragment(), 
+      false);
+    return iri;
+  }
+  
+  public IRI relativize(IRI iri) {
+    return relativize(this, iri);
+  }
+  
+  public boolean isPathAbsolute() {
+    String path = getPath();
+    return (path != null) && path.length() > 0 && path.charAt(0) == '/';
+  }
+  
+  public boolean isSameDocumentReference() {
+    return scheme == null &&
+           authority == null &&
+           (path == null || 
+            path.length() == 0 || 
+            path.equals(".")) &&
+           query == null;
+  }
+  
+  public static IRI resolve(IRI b, String c) throws IRISyntaxException, IOException {
+    return resolve(b, IRI.create(c));
+  }
+  
+  public static IRI resolve(IRI b, IRI c) {
+    if (c.isOpaque() || b.isOpaque()) return c;
+    if (c.isSameDocumentReference()) {
+      String cfragment = c.getFragment();
+      String bfragment = b.getFragment();
+      if ((cfragment == null && bfragment == null) ||
+          (cfragment != null && cfragment.equals(bfragment))) {
+          try {
+            return (IRI) b.clone();
+          } catch (Exception e) {
+            return null; // Not going to happen
+          } 
+      } else {
+        return new IRI(
+          b._scheme,
+          b.getScheme(),
+          b.getAuthority(),
+          b.getUserInfo(),
+          b.getHost(),
+          b.getPort(),
+          normalize(b.getPath()),
+          b.getQuery(),
+          cfragment,
+          b.doubleslash
+        );
+      }
+    }
+    if (c.isAbsolute()) return c;
+    
+    Scheme _scheme = b._scheme;
+    String scheme = b.scheme;
+    boolean ds = b.doubleslash;
+    String query = c.getQuery();
+    String fragment = c.getFragment();
+    String userinfo = null;
+    String authority = null;
+    String host = null;
+    int port = -1;
+    String path = null;
+    if (c.getAuthority() == null) {
+      authority = b.getAuthority();
+      userinfo = b.getUserInfo();
+      host = b.getHost();
+      port = b.getPort();
+      path = c.isPathAbsolute() ? normalize(c.getPath()) : resolve(b.getPath(),c.getPath());
+    } else {
+      authority = c.getAuthority();
+      userinfo = c.getUserInfo();
+      host = c.getHost();
+      port = c.getPort();
+      path = normalize(c.getPath());
+    }
+    return new IRI(_scheme,scheme,authority,userinfo,host,port,path,query,fragment,ds);
+  }
+  
+  public IRI normalize() {
+    return normalize(this);
+  }
+  
+  public static IRI normalize(IRI iri) {
+    if (iri.isOpaque() || iri.getPath() == null) return iri;
+    return new IRI(
+      iri._scheme,
+      iri.getScheme(),
+      iri.getAuthority(),
+      iri.getUserInfo(),
+      iri.getHost(),
+      iri.getPort(),
+      normalize(iri.getPath()),
+      iri.getQuery(),
+      iri.getFragment(),
+      iri.doubleslash
+    );
+  }
+
+  private static String normalize(String path) {
+    if (path == null) return "/";
+    String[] segments = path.split("/");
+    if (segments.length < 2) return path;
+    StringBuffer buf = new StringBuffer("/");
+    for (int n = 0; n < segments.length; n++) {
+      String segment = segments[n].intern();
+      if (segment == ".") {
+        segments[n] = null;
+      } else if (segment == "..") {
+        segments[n] = null;
+        int i = n;
+        while(--i > -1) {
+          if (segments[i] != null) break;
+        }
+        if (i > -1) segments[i] = null;
+      }
+    }
+    for (int n = 0; n < segments.length; n++) {
+      if (segments[n] != null) {
+        if (buf.length() > 1) buf.append('/');
+        buf.append(segments[n]);
+      }
+    }
+    if (path.charAt(path.length()-1) == '/') buf.append('/');
+    return buf.toString();
+  }
+  
+  private static String resolve(String bpath, String cpath) {
+    if (bpath == null && cpath == null) return null;
+    if (bpath == null && cpath != null) return cpath;
+    if (bpath != null && cpath == null) return bpath;
+    StringBuffer buf = new StringBuffer("");
+    int n = bpath.lastIndexOf('/');
+    if (n > -1) buf.append(bpath.substring(0,n+1));
+    if (cpath.length() != 0) buf.append(cpath);
+    return normalize(buf.toString());
+  }
+  
+  public IRI resolve(IRI iri) {
+    return resolve(this,iri);
+  }
+  
+  public IRI resolve(String iri) throws IRISyntaxException, IOException {
+    return resolve(this,IRI.create(iri));
+  }
+  
+  public String toString() {
+    StringBuffer buf = new StringBuffer();
+    String scheme = getScheme();
+    if (scheme != null && scheme.length() != 0) {
+      buf.append(scheme);
+      buf.append(':');
+    }
+    buf.append(getSchemeSpecificPart());
+    return buf.toString();
+  }
+  
+  public String toASCIIString() {
+    StringBuffer buf = new StringBuffer();
+    String scheme = getScheme();
+    if (scheme != null && scheme.length() != 0) {
+      buf.append(scheme);
+      buf.append(':');
+    }
+    buf.append(getASCIISchemeSpecificPart());
+    return buf.toString();
+  }
+  
+  public String toBIDIString() {
+    StringBuffer buf = new StringBuffer(toString());
+    if (buf.length() > 0) {
+      if (buf.charAt(0) != '\u202A') buf.insert(0,'\u202A');
+      if (buf.charAt(buf.length()-1) != '\u202C') buf.append('\u202C');
+    }
+    return buf.toString();
+  }
+  
+  public java.net.URI toURI() throws URISyntaxException {
+    return new java.net.URI(toASCIIString());
+  }
+  
+  public java.net.URL toURL() throws MalformedURLException, URISyntaxException {
+    return toURI().toURL();
+  }
+  
+  ////////// parse implementation
+  
+  private static void parse(String uri, Builder builder) throws IRISyntaxException, IOException {
+    SchemeRegistry reg = SchemeRegistry.getInstance();
+    builder.chars = uri.toCharArray();
+    CodepointIterator ci = CodepointIterator.forCharArray(builder.chars);
+    Parser.parse(ci, builder, reg);
+  }
+  
+  public static IRI create(String iri) throws IRISyntaxException, IOException {
+    return new IRI(iri);
+  }
+  
+  public static IRI create(String iri, Normalizer.Form nf) throws IRISyntaxException, IOException {
+    return new IRI(iri,nf);
+  }
+  
+  static void splitAuthority(String authority, Builder builder) {
+    if (authority != null) {
+      int n = authority.indexOf('@');
+      if (n > -1) builder.userinfo = authority.substring(0,n);
+      int a = authority.indexOf('[',n);
+      if (a > -1) {
+        int m = authority.indexOf(']',a);
+        if (m > -1) a = m;
+        a = authority.indexOf(':',a);
+      } else
+      a = authority.indexOf(':',n);
+      if (a > -1) {
+        builder.host = authority.substring(n+1,a);
+        String p = authority.substring(a+1);
+        if (p.length() > 0) {
+          try {
+            builder.port = Integer.parseInt(p);
+          } catch (Exception e) {}
+        }
+      } else builder.host = authority.substring(n+1);
+    }
+  }
+  
+  static class Builder implements org.apache.abdera.util.iri.Builder {
+    private Scheme schemeobj;
+    private char[] chars;
+    private String scheme;
+    private String authority;
+    private String userinfo;
+    private String host;
+    private int port = -1;
+    private String path;
+    private String query;
+    private String fragment;
+    private boolean doubleslash;
+    
+    private void setScheme(Scheme scheme) {
+      this.schemeobj = scheme;
+    }
+    
+    public void scheme(int s, int l) {
+      scheme = (l > 0) ? new String(chars,s,l).toLowerCase() : null;
+    }
+    public void authority(int s, int l) {
+      authority = (l > 0) ? new String(chars,s,l) : null;
+      splitAuthority(authority, this);
+    }
+    public void path(int s, int l) {
+      path = (l > 0) ? new String(chars,s,l) : null;
+    }
+    public void query(int s, int l) {
+      query = (l > 0) ? new String(chars,s,l) : null;
+    }
+    public void fragment(int s, int l) {
+      fragment = (l > 0) ? new String(chars,s,l) : null;
+    }
+    
+    public IRI getAtomURI() {
+      return new IRI(
+        schemeobj,
+        scheme,authority,userinfo,
+        host,port,path,query,fragment, 
+        doubleslash);
+    }
+  }
+  
+  static class Parser {
+    static void parse(CodepointIterator ci, Builder builder, SchemeRegistry reg) 
+      throws IRISyntaxException, 
+             IOException {
+      int e = ci.position();
+      scan(ci,Constants.SCHEME,-1);
+      if (ci.peek() == ':')
+        builder.scheme(e,ci.position()-e);
+      Scheme _scheme = null;
+      if (builder.scheme != null && builder.scheme.length() != 0)
+        _scheme = reg.getScheme(builder.scheme);      
+      if (_scheme != null) {
+        // allow for scheme specific parsing. if the resolved scheme
+        // does parse the result, skip the rest, otherwise, do the 
+        // default parsing
+        builder.setScheme(_scheme);
+        if (_scheme.parse(ci, builder)) return;
+      }
+      // default parsing. works for most common schemes
+      else ci.position(e);
+      scan(ci, Constants.COLON,1);
+      e = ci.position();
+      if (ci.peek() == '/' && 
+          ci.peek(ci.position() + 1) == '/') {
+        scan(ci,Constants.SLASH,2);
+        builder.doubleslash = true;
+      }
+      e = ci.position();
+      int f = find(ci,Constants.SEPS);
+      if(f != 0) {
+        scan(ci,Constants.ISERVER,-1);
+      if (ci.peek() == -1 || CharUtils.isSet(ci.peek(), Constants.SEPS)) {        
+        builder.authority(e,ci.position()-e);
+      }
+      else ci.position(e);
+      e = ci.position();
+      }
+      scan(ci,Constants.IPATH,-1);
+      builder.path(e,ci.position()-e);
+      scan(ci,Constants.QUERYMARK,-1);
+      e = ci.position();
+      scan(ci,Constants.IQUERY,-1);
+      builder.query(e,ci.position()-e);
+      scan(ci,Constants.HASH,-1);
+      e = ci.position();
+      scan(ci,Constants.IFRAGMENT,-1);
+      builder.fragment(e,ci.position()-e);
+    }
+  }
+  
+  private static int find(CodepointIterator ci, BitSet set) throws IOException {
+    int n = ci.position();
+    int c = -1;
+    while((c = ci.peek(n++)) != -1 && set.get(c)) { n++; } 
+    return n-1;
+  }
+  
+  private static int scan(CodepointIterator ci, BitSet set, int count) throws IOException, IRISyntaxException {
+    while (ci.hasNext() && ci.peek() != -1 && set.get(ci.peek())){ 
+      int p = ci.next();
+      if (!set.get(p)) {
+        if (!CharUtils.isSet(p, Constants.RESERVED, Constants.IUNRESERVED, Constants.HASH)) 
+          throw new IRISyntaxException("Invalid Character (0x" + Integer.toHexString(p) + ") In URI");
+        return -1;
+      }
+    }
+    return -1;
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,28 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+
+class IRISyntaxException extends IOException {
+  private static final long serialVersionUID = 5177739661976965423L;
+
+  IRISyntaxException(String message) {
+    super(message);
+  }
+}
\ No newline at end of file

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,922 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.BitSet;
+
+import org.apache.abdera.util.ChainableBitSet;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.unicode.Normalizer;
+
+
+/**
+ * Implements the Nameprep protocol
+ */
+public class Nameprep {
+
+  public static String prep(String s, boolean allowunassigned) {
+    NameprepCodepointIterator r = null;
+    try {
+      StringBuffer buf = new StringBuffer();
+      CodepointIterator ci = CodepointIterator.forCharSequence(s);
+      r = new NameprepCodepointIterator(ci,allowunassigned);
+      while(r.hasNext()) {
+        int i = r.next();
+        if (i != -1)
+        buf.append((char)i);
+      }
+      return Normalizer.normalize(buf.toString(),Normalizer.Form.KC).toString();
+    } catch (Throwable e) {
+      throw new RuntimeException(e);
+    }
+  }
+  
+  public static String prep(String s) {
+    return prep(s,false);
+  }
+  
+  public static final BitSet B1 = new ChainableBitSet()
+    .set2('\u00AD','\u034F','\u1806','\u180B','\u180C','\u180D','\u200B',
+          '\u200C','\u200D','\u2060','\uFE00','\uFE01','\uFE02','\uFE03',
+          '\uFE04','\uFE05','\uFE06','\uFE07','\uFE08','\uFE09','\uFE0A',
+          '\uFE0B','\uFE0C','\uFE0D','\uFE0E','\uFE0F','\uFEFF',
+          // Not listed in B1, but treated as map-to-nothing by others
+          0x8F,0xA0,0x86,0x8b,0x80,0x81,0x88
+          );
+  
+  public static final int[][] B2 = loadb2();
+  
+  public static final int[] B2(int c) {   
+    return (B2[c] != null) ? B2[c] : null;
+  }
+  
+  private static int[] parse(String rep) {
+    String[] tokens = rep.trim().split(" ");
+    int[] i = new int[tokens.length];
+    for (int n = 0; n < tokens.length; n++) {
+      i[n]=Integer.parseInt(tokens[n],16);
+    }
+    return i;
+  }
+  
+  private static final String B2DAT = "/org/apache/abdera/util/iri/data/B2.dat";
+  
+  public static int[][] loadb2() {
+    int[][] map = new int[120764][];
+    try {
+      InputStream in = Nameprep.class.getResourceAsStream(B2DAT);
+      InputStreamReader rdr = new InputStreamReader(in);
+      BufferedReader bufr = new BufferedReader(rdr);
+      String line = null;
+      while((line = bufr.readLine()) != null) {
+        String[] tokens = line.trim().split("\\s*;\\s*");
+        int n = Integer.parseInt(tokens[0],16);
+        int[] i = parse(tokens[1]);
+        map[n] = i;
+      }
+    } catch (IOException e) {
+    }
+    return map;
+  }
+  
+  public static final BitSet UNASSIGNED = new ChainableBitSet()  
+  .set2('\u0221')
+  .set2('\u0234','\u024F')
+  .set2('\u02AE','\u02AF')
+  .set2('\u02EF','\u02FF')
+  .set2('\u0350','\u035F')
+  .set2('\u0370','\u0373')
+  .set2('\u0376','\u0379')
+  .set2('\u037B','\u037D')
+  .set2('\u037F','\u0383')
+  .set2('\u038B')
+  .set2('\u038D')
+  .set2('\u03A2')
+  .set2('\u03CF')
+  .set2('\u03F7','\u03FF')
+  .set2('\u0487')
+  .set2('\u04CF')
+  .set2('\u04F6','\u04F7')
+  .set2('\u04FA','\u04FF')
+  .set2('\u0510','\u0530')
+  .set2('\u0557','\u0558')
+  .set2('\u0560')
+  .set2('\u0588')
+  .set2('\u058B','\u0590')
+  .set2('\u05A2')
+  .set2('\u05BA')
+  .set2('\u05C5','\u05CF')
+  .set2('\u05EB','\u05EF')
+  .set2('\u05F5','\u060B')
+  .set2('\u060D','\u061A')
+  .set2('\u061C','\u061E')
+  .set2('\u0620')
+  .set2('\u063B','\u063F')
+  .set2('\u0656','\u065F')
+  .set2('\u06EE','\u06EF')
+  .set2('\u06FF')
+  .set2('\u070E')
+  .set2('\u072D','\u072F')
+  .set2('\u074B','\u077F')
+  .set2('\u07B2','\u0900')
+  .set2('\u0904')
+  .set2('\u093A','\u093B')
+  .set2('\u094E','\u094F')
+  .set2('\u0955','\u0957')
+  .set2('\u0971','\u0980')
+  .set2('\u0984')
+  .set2('\u098D','\u098E')
+  .set2('\u0991','\u0992')
+  .set2('\u09A9')
+  .set2('\u09B1')
+  .set2('\u09B3','\u09B5')
+  .set2('\u09BA','\u09BB')
+  .set2('\u09BD')
+  .set2('\u09C5','\u09C6')
+  .set2('\u09C9','\u09CA')
+  .set2('\u09CE','\u09D6')
+  .set2('\u09D8','\u09DB')
+  .set2('\u09DE')
+  .set2('\u09E4','\u09E5')
+  .set2('\u09FB','\u0A01')
+  .set2('\u0A03','\u0A04')
+  .set2('\u0A0B','\u0A0E')
+  .set2('\u0A11','\u0A12')
+  .set2('\u0A29')
+  .set2('\u0A31')
+  .set2('\u0A34')
+  .set2('\u0A37')
+  .set2('\u0A3A','\u0A3B')
+  .set2('\u0A3D')
+  .set2('\u0A43','\u0A46')
+  .set2('\u0A49','\u0A4A')
+  .set2('\u0A4E','\u0A58')
+  .set2('\u0A5D')
+  .set2('\u0A5F','\u0A65')
+  .set2('\u0A75','\u0A80')
+  .set2('\u0A84')
+  .set2('\u0A8C')
+  .set2('\u0A8E')
+  .set2('\u0A92')
+  .set2('\u0AA9')
+  .set2('\u0AB1')
+  .set2('\u0AB4')
+  .set2('\u0ABA','\u0ABB')
+  .set2('\u0AC6')
+  .set2('\u0ACA')
+  .set2('\u0ACE','\u0ACF')
+  .set2('\u0AD1','\u0ADF')
+  .set2('\u0AE1','\u0AE5')
+  .set2('\u0AF0','\u0B00')
+  .set2('\u0B04')
+  .set2('\u0B0D','\u0B0E')
+  .set2('\u0B11','\u0B12')
+  .set2('\u0B29')
+  .set2('\u0B31')
+  .set2('\u0B34','\u0B35')
+  .set2('\u0B3A','\u0B3B')
+  .set2('\u0B44','\u0B46')
+  .set2('\u0B49','\u0B4A')
+  .set2('\u0B4E','\u0B55')
+  .set2('\u0B58','\u0B5B')
+  .set2('\u0B5E')
+  .set2('\u0B62','\u0B65')
+  .set2('\u0B71','\u0B81')
+  .set2('\u0B84')
+  .set2('\u0B8B','\u0B8D')
+  .set2('\u0B91')
+  .set2('\u0B96','\u0B98')
+  .set2('\u0B9B')
+  .set2('\u0B9D')
+  .set2('\u0BA0','\u0BA2')
+  .set2('\u0BA5','\u0BA7')
+  .set2('\u0BAB','\u0BAD')
+  .set2('\u0BB6')
+  .set2('\u0BBA','\u0BBD')
+  .set2('\u0BC3','\u0BC5')
+  .set2('\u0BC9')
+  .set2('\u0BCE','\u0BD6')
+  .set2('\u0BD8','\u0BE6')
+  .set2('\u0BF3','\u0C00')
+  .set2('\u0C04')
+  .set2('\u0C0D')
+  .set2('\u0C11')
+  .set2('\u0C29')
+  .set2('\u0C34')
+  .set2('\u0C3A','\u0C3D')
+  .set2('\u0C45')
+  .set2('\u0C49')
+  .set2('\u0C4E','\u0C54')
+  .set2('\u0C57','\u0C5F')
+  .set2('\u0C62','\u0C65')
+  .set2('\u0C70','\u0C81')
+  .set2('\u0C84')
+  .set2('\u0C8D')
+  .set2('\u0C91')
+  .set2('\u0CA9')
+  .set2('\u0CB4')
+  .set2('\u0CBA','\u0CBD')
+  .set2('\u0CC5')
+  .set2('\u0CC9')
+  .set2('\u0CCE','\u0CD4')
+  .set2('\u0CD7','\u0CDD')
+  .set2('\u0CDF')
+  .set2('\u0CE2','\u0CE5')
+  .set2('\u0CF0','\u0D01')
+  .set2('\u0D04')
+  .set2('\u0D0D')
+  .set2('\u0D11')
+  .set2('\u0D29')
+  .set2('\u0D3A','\u0D3D')
+  .set2('\u0D44','\u0D45')
+  .set2('\u0D49')
+  .set2('\u0D4E','\u0D56')
+  .set2('\u0D58','\u0D5F')
+  .set2('\u0D62','\u0D65')
+  .set2('\u0D70','\u0D81')
+  .set2('\u0D84')
+  .set2('\u0D97','\u0D99')
+  .set2('\u0DB2')
+  .set2('\u0DBC')
+  .set2('\u0DBE','\u0DBF')
+  .set2('\u0DC7','\u0DC9')
+  .set2('\u0DCB','\u0DCE')
+  .set2('\u0DD5')
+  .set2('\u0DD7')
+  .set2('\u0DE0','\u0DF1')
+  .set2('\u0DF5','\u0E00')
+  .set2('\u0E3B','\u0E3E')
+  .set2('\u0E5C','\u0E80')
+  .set2('\u0E83')
+  .set2('\u0E85','\u0E86')
+  .set2('\u0E89')
+  .set2('\u0E8B','\u0E8C')
+  .set2('\u0E8E','\u0E93')
+  .set2('\u0E98')
+  .set2('\u0EA0')
+  .set2('\u0EA4')
+  .set2('\u0EA6')
+  .set2('\u0EA8','\u0EA9')
+  .set2('\u0EAC')
+  .set2('\u0EBA')
+  .set2('\u0EBE','\u0EBF')
+  .set2('\u0EC5')
+  .set2('\u0EC7')
+  .set2('\u0ECE','\u0ECF')
+  .set2('\u0EDA','\u0EDB')
+  .set2('\u0EDE','\u0EFF')
+  .set2('\u0F48')
+  .set2('\u0F6B','\u0F70')
+  .set2('\u0F8C','\u0F8F')
+  .set2('\u0F98')
+  .set2('\u0FBD')
+  .set2('\u0FCD','\u0FCE')
+  .set2('\u0FD0','\u0FFF')
+  .set2('\u1022')
+  .set2('\u1028')
+  .set2('\u102B')
+  .set2('\u1033','\u1035')
+  .set2('\u103A','\u103F')
+  .set2('\u105A','\u109F')
+  .set2('\u10C6','\u10CF')
+  .set2('\u10F9','\u10FA')
+  .set2('\u10FC','\u10FF')
+  .set2('\u115A','\u115E')
+  .set2('\u11A3','\u11A7')
+  .set2('\u11FA','\u11FF')
+  .set2('\u1207')
+  .set2('\u1247')
+  .set2('\u1249')
+  .set2('\u124E','\u124F')
+  .set2('\u1257')
+  .set2('\u1259')
+  .set2('\u125E','\u125F')
+  .set2('\u1287')
+  .set2('\u1289')
+  .set2('\u128E','\u128F')
+  .set2('\u12AF')
+  .set2('\u12B1')
+  .set2('\u12B6','\u12B7')
+  .set2('\u12BF')
+  .set2('\u12C1')
+  .set2('\u12C6','\u12C7')
+  .set2('\u12CF')
+  .set2('\u12D7')
+  .set2('\u12EF')
+  .set2('\u130F')
+  .set2('\u1311')
+  .set2('\u1316','\u1317')
+  .set2('\u131F')
+  .set2('\u1347')
+  .set2('\u135B','\u1360')
+  .set2('\u137D','\u139F')
+  .set2('\u13F5','\u1400')
+  .set2('\u1677','\u167F')
+  .set2('\u169D','\u169F')
+  .set2('\u16F1','\u16FF')
+  .set2('\u170D')
+  .set2('\u1715','\u171F')
+  .set2('\u1737','\u173F')
+  .set2('\u1754','\u175F')
+  .set2('\u176D')
+  .set2('\u1771')
+  .set2('\u1774','\u177F')
+  .set2('\u17DD','\u17DF')
+  .set2('\u17EA','\u17FF')
+  .set2('\u180F')
+  .set2('\u181A','\u181F')
+  .set2('\u1878','\u187F')
+  .set2('\u18AA','\u1DFF')
+  .set2('\u1E9C','\u1E9F')
+  .set2('\u1EFA','\u1EFF')
+  .set2('\u1F16','\u1F17')
+  .set2('\u1F1E','\u1F1F')
+  .set2('\u1F46','\u1F47')
+  .set2('\u1F4E','\u1F4F')
+  .set2('\u1F58')
+  .set2('\u1F5A')
+  .set2('\u1F5C')
+  .set2('\u1F5E')
+  .set2('\u1F7E','\u1F7F')
+  .set2('\u1FB5')
+  .set2('\u1FC5')
+  .set2('\u1FD4','\u1FD5')
+  .set2('\u1FDC')
+  .set2('\u1FF0','\u1FF1')
+  .set2('\u1FF5')
+  .set2('\u1FFF')
+  .set2('\u2053','\u2056')
+  .set2('\u2058','\u205E')
+  .set2('\u2064','\u2069')
+  .set2('\u2072','\u2073')
+  .set2('\u208F','\u209F')
+  .set2('\u20B2','\u20CF')
+  .set2('\u20EB','\u20FF')
+  .set2('\u213B','\u213C')
+  .set2('\u214C','\u2152')
+  .set2('\u2184','\u218F')
+  .set2('\u23CF','\u23FF')
+  .set2('\u2427','\u243F')
+  .set2('\u244B','\u245F')
+  .set2('\u24FF')
+  .set2('\u2614','\u2615')
+  .set2('\u2618')
+  .set2('\u267E','\u267F')
+  .set2('\u268A','\u2700')
+  .set2('\u2705')
+  .set2('\u270A','\u270B')
+  .set2('\u2728')
+  .set2('\u274C')
+  .set2('\u274E')
+  .set2('\u2753','\u2755')
+  .set2('\u2757')
+  .set2('\u275F','\u2760')
+  .set2('\u2795','\u2797')
+  .set2('\u27B0')
+  .set2('\u27BF','\u27CF')
+  .set2('\u27EC','\u27EF')
+  .set2('\u2B00','\u2E7F')
+  .set2('\u2E9A')
+  .set2('\u2EF4','\u2EFF')
+  .set2('\u2FD6','\u2FEF')
+  .set2('\u2FFC','\u2FFF')
+  .set2('\u3040')
+  .set2('\u3097','\u3098')
+  .set2('\u3100','\u3104')
+  .set2('\u312D','\u3130')
+  .set2('\u318F')
+  .set2('\u31B8','\u31EF')
+  .set2('\u321D','\u321F')
+  .set2('\u3244','\u3250')
+  .set2('\u327C','\u327E')
+  .set2('\u32CC','\u32CF')
+  .set2('\u32FF')
+  .set2('\u3377','\u337A')
+  .set2('\u33DE','\u33DF')
+  .set2('\u33FF')
+  .set2('\u4DB6','\u4DFF')
+  .set2('\u9FA6','\u9FFF')
+  .set2('\uA48D','\uA48F')
+  .set2('\uA4C7','\uABFF')
+  .set2('\uD7A4','\uD7FF')
+  .set2('\uFA2E','\uFA2F')
+  .set2('\uFA6B','\uFAFF')
+  .set2('\uFB07','\uFB12')
+  .set2('\uFB18','\uFB1C')
+  .set2('\uFB37')
+  .set2('\uFB3D')
+  .set2('\uFB3F')
+  .set2('\uFB42')
+  .set2('\uFB45')
+  .set2('\uFBB2','\uFBD2')
+  .set2('\uFD40','\uFD4F')
+  .set2('\uFD90','\uFD91')
+  .set2('\uFDC8','\uFDCF')
+  .set2('\uFDFD','\uFDFF')
+  .set2('\uFE10','\uFE1F')
+  .set2('\uFE24','\uFE2F')
+  .set2('\uFE47','\uFE48')
+  .set2('\uFE53')
+  .set2('\uFE67')
+  .set2('\uFE6C','\uFE6F')
+  .set2('\uFE75')
+  .set2('\uFEFD','\uFEFE')
+  .set2('\uFF00')
+  .set2('\uFFBF','\uFFC1')
+  .set2('\uFFC8','\uFFC9')
+  .set2('\uFFD0','\uFFD1')
+  .set2('\uFFD8','\uFFD9')
+  .set2('\uFFDD','\uFFDF')
+  .set2('\uFFE7')
+  .set2('\uFFEF','\uFFF8')
+  .set2(0x10000,0x102FF)
+  .set2(0x1031F)
+  .set2(0x10324,0x1032F)
+  .set2(0x1034B,0x103FF)
+  .set2(0x10426,0x10427)
+  .set2(0x1044E,0x1CFFF)
+  .set2(0x1D0F6,0x1D0FF)
+  .set2(0x1D127,0x1D129)
+  .set2(0x1D1DE,0x1D3FF)
+  .set2(0x1D455)
+  .set2(0x1D49D)
+  .set2(0x1D4A0,0x1D4A1)
+  .set2(0x1D4A3,0x1D4A4)
+  .set2(0x1D4A7,0x1D4A8)
+  .set2(0x1D4AD)
+  .set2(0x1D4BA)
+  .set2(0x1D4BC)
+  .set2(0x1D4C1)
+  .set2(0x1D4C4)
+  .set2(0x1D506)
+  .set2(0x1D50B,0x1D50C)
+  .set2(0x1D515)
+  .set2(0x1D51D)
+  .set2(0x1D53A)
+  .set2(0x1D53F)
+  .set2(0x1D545)
+  .set2(0x1D547,0x1D549)
+  .set2(0x1D551)
+  .set2(0x1D6A4,0x1D6A7)
+  .set2(0x1D7CA,0x1D7CD)
+  .set2(0x1D800,0x1FFFD)
+  .set2(0x2A6D7,0x2F7FF)
+  .set2(0x2FA1E,0x2FFFD)
+  .set2(0x30000,0x3FFFD)
+  .set2(0x40000,0x4FFFD)
+  .set2(0x50000,0x5FFFD)
+  .set2(0x60000,0x6FFFD)
+  .set2(0x70000,0x7FFFD)
+  .set2(0x80000,0x8FFFD)
+  .set2(0x90000,0x9FFFD)
+  .set2(0xA0000,0xAFFFD)
+  .set2(0xB0000,0xBFFFD)
+  .set2(0xC0000,0xCFFFD)
+  .set2(0xD0000,0xDFFFD)
+  .set2(0xE0000)
+  .set2(0xE0002,0xE001F)
+  .set2(0xE0080,0xEFFFD);
+
+  
+  public static final ChainableBitSet PROHIBITED = new ChainableBitSet()
+
+  // c.1.2
+   .set2('\u00A0').set2('\u1680').set2('\u2000','\u200B')
+   .set2('\u202F').set2('\u205F').set2('\u3000')
+
+  // c.2.2
+   .set2('\u0080','\u009F').set2('\u06DD').set2('\u070F')
+   .set2('\u180E').set2('\u200C').set2('\u200D').set2('\u2028')
+   .set2('\u2029').set2('\u2060').set2('\u2061').set2('\u2062')
+   .set2('\u2063').set2('\u206A','\u206F').set2('\uFEFF')
+   .set2('\uFFF9','\uFFFC').set2(0x1D173,0x1D17A)
+                                             
+  // c.3
+   .set2('\uE000','\uF8FF').set2(0xF0000,0xFFFFD).set2(0x100000,0x10FFFD)
+                   
+  // c.4
+   .set2('\uFDD0','\uFDEF').set2('\uFFFE','\uFFFF').set2(0x1FFFE,0x1FFFF)
+   .set2(0x2FFFE,0x2FFFF).set2(0x3FFFE,0x3FFFF).set2(0x4FFFE,0x4FFFF)
+   .set2(0x5FFFE,0x5FFFF).set2(0x6FFFE,0x6FFFF).set2(0x7FFFE,0x7FFFF)
+   .set2(0x8FFFE,0x8FFFF).set2(0x9FFFE,0x9FFFF).set2(0xAFFFE,0xAFFFF)
+   .set2(0xBFFFE,0xBFFFF).set2(0xCFFFE,0xCFFFF).set2(0xDFFFE,0xDFFFF)
+   .set2(0xEFFFE,0xEFFFF).set2(0xFFFFE,0xFFFFF).set2(0x10FFFE,0x10FFFF)
+   
+ // c.5
+   .set2('\uD800','\uDFFF')
+   
+ // c.6
+   .set2('\uFFF9','\uFFFD')
+   
+ // c.7
+   .set2('\u2FF0','\u2FFB')
+   
+ // c.8
+   .set2('\u0340').set2('\u0341').set2('\u200E')
+   .set2('\u200F').set2('\u202A').set2('\u202B')
+   .set2('\u202C').set2('\u202D').set2('\u202E')
+   .set2('\u206A').set2('\u206B').set2('\u206C')
+   .set2('\u206D').set2('\u206E').set2('\u206F')
+   
+ // c.9
+   .set2(0xE0001).set2(0xE0020,0xE007F)
+  
+ // unassigned
+   .set2(UNASSIGNED);
+  
+  
+  public static final BitSet RandAL = new ChainableBitSet()
+   .set2('\u05BE').set2('\u05C0').set2('\u05C3')
+   .set2('\u05D0','\u05EA').set2('\u05F0','\u05F4')
+   .set2('\u061B').set2('\u061F').set2('\u0621','\u063A')
+   .set2('\u0640','\u064A').set2('\u066D','\u066F').set2('\u0671','\u06D5')
+   .set2('\u06DD').set2('\u06E5','\u06E6').set2('\u06FA','\u06FE')
+   .set2('\u0700','\u070D').set2('\u0710').set2('\u0712','\u072C')
+   .set2('\u0780','\u07A5').set2('\u07B1').set2('\u200F')
+   .set2('\uFB1D').set2('\uFB1F','\uFB28').set2('\uFB2A','\uFB36')
+   .set2('\uFB38','\uFB3C').set2('\uFB3E').set2('\uFB40','\uFB41')
+   .set2('\uFB43','\uFB44').set2('\uFB46','\uFBB1').set2('\uFBD3','\uFD3D')
+   .set2('\uFD50','\uFD8F').set2('\uFD92','\uFDC7').set2('\uFDF0','\uFDFC')
+   .set2('\uFE70','\uFE74').set2('\uFE76','\uFEFC');
+  
+  public static final BitSet LCat = new ChainableBitSet()
+    .set2('\u0041','\u005A')
+    .set2('\u0061','\u007A')
+    .set2('\u00AA')
+    .set2('\u00B5')
+    .set2('\u00BA')
+    .set2('\u00C0','\u00D6')
+    .set2('\u00D8','\u00F6')
+    .set2('\u00F8','\u0220')
+    .set2('\u0222','\u0233')
+    .set2('\u0250','\u02AD')
+    .set2('\u02B0','\u02B8')
+    .set2('\u02BB','\u02C1')
+    .set2('\u02D0','\u02D1')
+    .set2('\u02E0','\u02E4')
+    .set2('\u02EE')
+    .set2('\u037A')
+    .set2('\u0386')
+    .set2('\u0388','\u038A')
+    .set2('\u038C')
+    .set2('\u038E','\u03A1')
+    .set2('\u03A3','\u03CE')
+    .set2('\u03D0','\u03F5')
+    .set2('\u0400','\u0482')
+    .set2('\u048A','\u04CE')
+    .set2('\u04D0','\u04F5')
+    .set2('\u04F8','\u04F9')
+    .set2('\u0500','\u050F')
+    .set2('\u0531','\u0556')
+    .set2('\u0559','\u055F')
+    .set2('\u0561','\u0587')
+    .set2('\u0589')
+    .set2('\u0903')
+    .set2('\u0905','\u0939')
+    .set2('\u093D','\u0940')
+    .set2('\u0949','\u094C')
+    .set2('\u0950')
+    .set2('\u0958','\u0961')
+    .set2('\u0964','\u0970')
+    .set2('\u0982','\u0983')
+    .set2('\u0985','\u098C')
+    .set2('\u098F','\u0990')
+    .set2('\u0993','\u09A8')
+    .set2('\u09AA','\u09B0')
+    .set2('\u09B2')
+    .set2('\u09B6','\u09B9')
+    .set2('\u09BE','\u09C0')
+    .set2('\u09C7','\u09C8')
+    .set2('\u09CB','\u09CC')
+    .set2('\u09D7')
+    .set2('\u09DC','\u09DD')
+    .set2('\u09DF','\u09E1')
+    .set2('\u09E6','\u09F1')
+    .set2('\u09F4','\u09FA')
+    .set2('\u0A05','\u0A0A')
+    .set2('\u0A0F','\u0A10')
+    .set2('\u0A13','\u0A28')
+    .set2('\u0A2A','\u0A30')
+    .set2('\u0A32','\u0A33')
+    .set2('\u0A35','\u0A36')
+    .set2('\u0A38','\u0A39')
+    .set2('\u0A3E','\u0A40')
+    .set2('\u0A59','\u0A5C')
+    .set2('\u0A5E')
+    .set2('\u0A66','\u0A6F')
+    .set2('\u0A72','\u0A74')
+    .set2('\u0A83')
+    .set2('\u0A85','\u0A8B')
+    .set2('\u0A8D')
+    .set2('\u0A8F','\u0A91')
+    .set2('\u0A93','\u0AA8')
+    .set2('\u0AAA','\u0AB0')
+    .set2('\u0AB2','\u0AB3')
+    .set2('\u0AB5','\u0AB9')
+    .set2('\u0ABD','\u0AC0')
+    .set2('\u0AC9')
+    .set2('\u0ACB','\u0ACC')
+    .set2('\u0AD0')
+    .set2('\u0AE0')
+    .set2('\u0AE6','\u0AEF')
+    .set2('\u0B02','\u0B03')
+    .set2('\u0B05','\u0B0C')
+    .set2('\u0B0F','\u0B10')
+    .set2('\u0B13','\u0B28')
+    .set2('\u0B2A','\u0B30')
+    .set2('\u0B32','\u0B33')
+    .set2('\u0B36','\u0B39')
+    .set2('\u0B3D','\u0B3E')
+    .set2('\u0B40')
+    .set2('\u0B47','\u0B48')
+    .set2('\u0B4B','\u0B4C')
+    .set2('\u0B57')
+    .set2('\u0B5C','\u0B5D')
+    .set2('\u0B5F','\u0B61')
+    .set2('\u0B66','\u0B70')
+    .set2('\u0B83')
+    .set2('\u0B85','\u0B8A')
+    .set2('\u0B8E','\u0B90')
+    .set2('\u0B92','\u0B95')
+    .set2('\u0B99','\u0B9A')
+    .set2('\u0B9C')
+    .set2('\u0B9E','\u0B9F')
+    .set2('\u0BA3','\u0BA4')
+    .set2('\u0BA8','\u0BAA')
+    .set2('\u0BAE','\u0BB5')
+    .set2('\u0BB7','\u0BB9')
+    .set2('\u0BBE','\u0BBF')
+    .set2('\u0BC1','\u0BC2')
+    .set2('\u0BC6','\u0BC8')
+    .set2('\u0BCA','\u0BCC')
+    .set2('\u0BD7')
+    .set2('\u0BE7','\u0BF2')
+    .set2('\u0C01','\u0C03')
+    .set2('\u0C05','\u0C0C')
+    .set2('\u0C0E','\u0C10')
+    .set2('\u0C12','\u0C28')
+    .set2('\u0C2A','\u0C33')
+    .set2('\u0C35','\u0C39')
+    .set2('\u0C41','\u0C44')
+    .set2('\u0C60','\u0C61')
+    .set2('\u0C66','\u0C6F')
+    .set2('\u0C82','\u0C83')
+    .set2('\u0C85','\u0C8C')
+    .set2('\u0C8E','\u0C90')
+    .set2('\u0C92','\u0CA8')
+    .set2('\u0CAA','\u0CB3')
+    .set2('\u0CB5','\u0CB9')
+    .set2('\u0CBE')
+    .set2('\u0CC0','\u0CC4')
+    .set2('\u0CC7','\u0CC8')
+    .set2('\u0CCA','\u0CCB')
+    .set2('\u0CD5','\u0CD6')
+    .set2('\u0CDE')
+    .set2('\u0CE0','\u0CE1')
+    .set2('\u0CE6','\u0CEF')
+    .set2('\u0D02','\u0D03')
+    .set2('\u0D05','\u0D0C')
+    .set2('\u0D0E','\u0D10')
+    .set2('\u0D12','\u0D28')
+    .set2('\u0D2A','\u0D39')
+    .set2('\u0D3E','\u0D40')
+    .set2('\u0D46','\u0D48')
+    .set2('\u0D4A','\u0D4C')
+    .set2('\u0D57')
+    .set2('\u0D60','\u0D61')
+    .set2('\u0D66','\u0D6F')
+    .set2('\u0D82','\u0D83')
+    .set2('\u0D85','\u0D96')
+    .set2('\u0D9A','\u0DB1')
+    .set2('\u0DB3','\u0DBB')
+    .set2('\u0DBD')
+    .set2('\u0DC0','\u0DC6')
+    .set2('\u0DCF','\u0DD1')
+    .set2('\u0DD8','\u0DDF')
+    .set2('\u0DF2','\u0DF4')
+    .set2('\u0E01','\u0E30')
+    .set2('\u0E32','\u0E33')
+    .set2('\u0E40','\u0E46')
+    .set2('\u0E4F','\u0E5B')
+    .set2('\u0E81','\u0E82')
+    .set2('\u0E84')
+    .set2('\u0E87','\u0E88')
+    .set2('\u0E8A')
+    .set2('\u0E8D')
+    .set2('\u0E94','\u0E97')
+    .set2('\u0E99','\u0E9F')
+    .set2('\u0EA1','\u0EA3')
+    .set2('\u0EA5')
+    .set2('\u0EA7')
+    .set2('\u0EAA','\u0EAB')
+    .set2('\u0EAD','\u0EB0')
+    .set2('\u0EB2','\u0EB3')
+    .set2('\u0EBD')
+    .set2('\u0EC0','\u0EC4')
+    .set2('\u0EC6')
+    .set2('\u0ED0','\u0ED9')
+    .set2('\u0EDC','\u0EDD')
+    .set2('\u0F00','\u0F17')
+    .set2('\u0F1A','\u0F34')
+    .set2('\u0F36')
+    .set2('\u0F38')
+    .set2('\u0F3E','\u0F47')
+    .set2('\u0F49','\u0F6A')
+    .set2('\u0F7F')
+    .set2('\u0F85')
+    .set2('\u0F88','\u0F8B')
+    .set2('\u0FBE','\u0FC5')
+    .set2('\u0FC7','\u0FCC')
+    .set2('\u0FCF')
+    .set2('\u1000','\u1021')
+    .set2('\u1023','\u1027')
+    .set2('\u1029','\u102A')
+    .set2('\u102C')
+    .set2('\u1031')
+    .set2('\u1038')
+    .set2('\u1040','\u1057')
+    .set2('\u10A0','\u10C5')
+    .set2('\u10D0','\u10F8')
+    .set2('\u10FB')
+    .set2('\u1100','\u1159')
+    .set2('\u115F','\u11A2')
+    .set2('\u11A8','\u11F9')
+    .set2('\u1200','\u1206')
+    .set2('\u1208','\u1246')
+    .set2('\u1248')
+    .set2('\u124A','\u124D')
+    .set2('\u1250','\u1256')
+    .set2('\u1258')
+    .set2('\u125A','\u125D')
+    .set2('\u1260','\u1286')
+    .set2('\u1288')
+    .set2('\u128A','\u128D')
+    .set2('\u1290','\u12AE')
+    .set2('\u12B0')
+    .set2('\u12B2','\u12B5')
+    .set2('\u12B8','\u12BE')
+    .set2('\u12C0')
+    .set2('\u12C2','\u12C5')
+    .set2('\u12C8','\u12CE')
+    .set2('\u12D0','\u12D6')
+    .set2('\u12D8','\u12EE')
+    .set2('\u12F0','\u130E')
+    .set2('\u1310')
+    .set2('\u1312','\u1315')
+    .set2('\u1318','\u131E')
+    .set2('\u1320','\u1346')
+    .set2('\u1348','\u135A')
+    .set2('\u1361','\u137C')
+    .set2('\u13A0','\u13F4')
+    .set2('\u1401','\u1676')
+    .set2('\u1681','\u169A')
+    .set2('\u16A0','\u16F0')
+    .set2('\u1700','\u170C')
+    .set2('\u170E','\u1711')
+    .set2('\u1720','\u1731')
+    .set2('\u1735','\u1736')
+    .set2('\u1740','\u1751')
+    .set2('\u1760','\u176C')
+    .set2('\u176E','\u1770')
+    .set2('\u1780','\u17B6')
+    .set2('\u17BE','\u17C5')
+    .set2('\u17C7','\u17C8')
+    .set2('\u17D4','\u17DA')
+    .set2('\u17DC')
+    .set2('\u17E0','\u17E9')
+    .set2('\u1810','\u1819')
+    .set2('\u1820','\u1877')
+    .set2('\u1880','\u18A8')
+    .set2('\u1E00','\u1E9B')
+    .set2('\u1EA0','\u1EF9')
+    .set2('\u1F00','\u1F15')
+    .set2('\u1F18','\u1F1D')
+    .set2('\u1F20','\u1F45')
+    .set2('\u1F48','\u1F4D')
+    .set2('\u1F50','\u1F57')
+    .set2('\u1F59')
+    .set2('\u1F5B')
+    .set2('\u1F5D')
+    .set2('\u1F5F','\u1F7D')
+    .set2('\u1F80','\u1FB4')
+    .set2('\u1FB6','\u1FBC')
+    .set2('\u1FBE')
+    .set2('\u1FC2','\u1FC4')
+    .set2('\u1FC6','\u1FCC')
+    .set2('\u1FD0','\u1FD3')
+    .set2('\u1FD6','\u1FDB')
+    .set2('\u1FE0','\u1FEC')
+    .set2('\u1FF2','\u1FF4')
+    .set2('\u1FF6','\u1FFC')
+    .set2('\u200E')
+    .set2('\u2071')
+    .set2('\u207F')
+    .set2('\u2102')
+    .set2('\u2107')
+    .set2('\u210A','\u2113')
+    .set2('\u2115')
+    .set2('\u2119','\u211D')
+    .set2('\u2124')
+    .set2('\u2126')
+    .set2('\u2128')
+    .set2('\u212A','\u212D')
+    .set2('\u212F','\u2131')
+    .set2('\u2133','\u2139')
+    .set2('\u213D','\u213F')
+    .set2('\u2145','\u2149')
+    .set2('\u2160','\u2183')
+    .set2('\u2336','\u237A')
+    .set2('\u2395')
+    .set2('\u249C','\u24E9')
+    .set2('\u3005','\u3007')
+    .set2('\u3021','\u3029')
+    .set2('\u3031','\u3035')
+    .set2('\u3038','\u303C')
+    .set2('\u3041','\u3096')
+    .set2('\u309D','\u309F')
+    .set2('\u30A1','\u30FA')
+    .set2('\u30FC','\u30FF')
+    .set2('\u3105','\u312C')
+    .set2('\u3131','\u318E')
+    .set2('\u3190','\u31B7')
+    .set2('\u31F0','\u321C')
+    .set2('\u3220','\u3243')
+    .set2('\u3260','\u327B')
+    .set2('\u327F','\u32B0')
+    .set2('\u32C0','\u32CB')
+    .set2('\u32D0','\u32FE')
+    .set2('\u3300','\u3376')
+    .set2('\u337B','\u33DD')
+    .set2('\u33E0','\u33FE')
+    .set2('\u3400','\u4DB5')
+    .set2('\u4E00','\u9FA5')
+    .set2('\uA000','\uA48C')
+    .set2('\uAC00','\uD7A3')
+    .set2('\uD800','\uFA2D')
+    .set2('\uFA30','\uFA6A')
+    .set2('\uFB00','\uFB06')
+    .set2('\uFB13','\uFB17')
+    .set2('\uFF21','\uFF3A')
+    .set2('\uFF41','\uFF5A')
+    .set2('\uFF66','\uFFBE')
+    .set2('\uFFC2','\uFFC7')
+    .set2('\uFFCA','\uFFCF')
+    .set2('\uFFD2','\uFFD7')
+    .set2('\uFFDA','\uFFDC')
+    .set2(0x10300,0x1031E)
+    .set2(0x10320,0x10323)
+    .set2(0x10330,0x1034A)
+    .set2(0x10400,0x10425)
+    .set2(0x10428,0x1044D)
+    .set2(0x1D000,0x1D0F5)
+    .set2(0x1D100,0x1D126)
+    .set2(0x1D12A,0x1D166)
+    .set2(0x1D16A,0x1D172)
+    .set2(0x1D183,0x1D184)
+    .set2(0x1D18C,0x1D1A9)
+    .set2(0x1D1AE,0x1D1DD)
+    .set2(0x1D400,0x1D454)
+    .set2(0x1D456,0x1D49C)
+    .set2(0x1D49E,0x1D49F)
+    .set2(0x1D4A2)
+    .set2(0x1D4A5,0x1D4A6)
+    .set2(0x1D4A9,0x1D4AC)
+    .set2(0x1D4AE,0x1D4B9)
+    .set2(0x1D4BB)
+    .set2(0x1D4BD,0x1D4C0)
+    .set2(0x1D4C2,0x1D4C3)
+    .set2(0x1D4C5,0x1D505)
+    .set2(0x1D507,0x1D50A)
+    .set2(0x1D50D,0x1D514)
+    .set2(0x1D516,0x1D51C)
+    .set2(0x1D51E,0x1D539)
+    .set2(0x1D53B,0x1D53E)
+    .set2(0x1D540,0x1D544)
+    .set2(0x1D546)
+    .set2(0x1D54A,0x1D550)
+    .set2(0x1D552,0x1D6A3)
+    .set2(0x1D6A8,0x1D7C9)
+    .set2(0x20000,0x2A6D6)
+    .set2(0x2F800,0x2FA1D)
+    .set2(0xF0000,0xFFFFD)
+    .set2(0x100000,0x10FFFD);
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,114 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import org.apache.abdera.util.ChainableBitSet;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.io.FilterCodepointIterator;
+import org.apache.abdera.util.io.InvalidCharacterException;
+
+
+/**
+ * CodepointIterator implementation that implements the bulk of the 
+ * Nameprep details
+ */
+class NameprepCodepointIterator extends FilterCodepointIterator {
+
+  private int[] rep = null;
+  private int reppos = 0;
+  private boolean haslcat = false;
+  private boolean hasrandalcat = false;
+  private boolean firstisrandalcat = false;
+  
+  private final ChainableBitSet PROHIBITED;
+  
+  @Override
+  public boolean hasNext() {
+    return rep != null || super.hasNext();
+  }
+
+  protected NameprepCodepointIterator(
+    CodepointIterator internal) {
+      this(internal,false);        
+  }
+  
+  private boolean islcat(int r) {
+    return (Nameprep.LCat.get(r));
+  }
+  
+  private boolean israndalcat(int r) {
+    return (Nameprep.RandAL.get(r));
+  }
+  
+  protected NameprepCodepointIterator(
+  CodepointIterator internal, boolean allowunassigned) {
+    super(internal);
+    PROHIBITED = (!allowunassigned) ? 
+        Nameprep.PROHIBITED : 
+        ((ChainableBitSet)Nameprep.PROHIBITED.clone()).set2(
+            Nameprep.UNASSIGNED,false);
+  }
+
+  @Override
+  public int next() throws InvalidCharacterException {
+    int r = -1;
+    if (this.rep == null) {
+      r = super.next();
+      if (r != -1) {
+        if (islcat(r)) haslcat = true;
+        if (israndalcat(r)) {
+          hasrandalcat = true;
+          if (position() == 1) firstisrandalcat = true;
+        }
+        if (haslcat && hasrandalcat) throw new RuntimeException("Bidi Exception");
+        
+        while(r != -1 && Nameprep.B1.get(r)) { 
+          r = super.next();
+        }
+        
+        if (r != -1) {
+          if (PROHIBITED.get(r)) throw new InvalidCharacterException(r);
+          int[] rep = Nameprep.B2(r);
+          if (rep != null) {
+            if (rep.length > 1) {
+              this.rep = rep;
+              reppos = 0;
+            }
+            r = rep[0];
+          }
+        }
+      }
+    } else { 
+      r = rep[++reppos];
+      if (reppos+1 >= rep.length) rep = null;
+    }
+    if ((r == -1 || !hasNext()) && 
+        hasrandalcat && 
+        (!firstisrandalcat || 
+         !israndalcat((r ==-1)?peek(position()):r))) {
+      throw new RuntimeException("Bidi Exception");
+    }
+    return r;
+  }
+
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    return super.nextChars();
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,210 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+
+
+/**
+ * Implementation of the Punycode encoding scheme used by IDNA
+ */
+public final class Punycode {
+
+  static final int base = 0x24;             //  36
+  static final int tmin = 0x01;             //   1
+  static final int tmax = 0x1A;             //  26
+  static final int skew = 0x26;             //  38
+  static final int damp = 0x02BC;           // 700
+  static final int initial_bias = 0x48;     //  72
+  static final int initial_n = 0x80;        //0x80
+  static final int delimiter = 0x2D;        //0x2D
+
+  Punycode() {}
+
+  private static boolean basic(int cp) {
+    return cp < 0x80; 
+  }
+
+  private static boolean delim(int cp) {
+    return cp == delimiter;
+  }
+
+  private static boolean flagged(int bcp) {
+    return (bcp - 65) < 26;
+  }
+  
+  private static int decode_digit(int cp) {
+    return (cp - 48 < 10) ? 
+             cp - 22 : 
+             (cp - 65 < 26) ? 
+               cp - 65 :
+               (cp - 97 < 26) ? 
+                 cp - 97 :  
+                 base;
+  }
+
+  private static int t(boolean c) {
+    return (c)?1:0;
+  }
+  
+  private static int encode_digit(int d, boolean upper) {
+    return (d + 22 + 75 * t(d<26)) - (t(upper) << 5);
+  }
+
+  private static int adapt(int delta, int numpoints, boolean firsttime) {
+    int k;
+    delta = (firsttime) ? delta / damp : delta >> 1;
+    delta += delta / numpoints;
+    for (k = 0;  delta > ((base - tmin) * tmax) / 2;  k += base) {
+      delta /= base - tmin;
+    }
+    return k + (base - tmin + 1) * delta / (delta + skew);
+  }
+
+  public static StringBuffer encode(
+    char[] chars,
+    boolean[] case_flags) 
+      throws IOException {
+    StringBuffer buf = new StringBuffer();
+    CodepointIterator ci = CodepointIterator.forCharArray(chars);
+    int n, delta, h, b, bias, m, q, k, t;
+    n = initial_n;
+    delta = 0;
+    bias = initial_bias;
+    int i = -1;
+    while (ci.hasNext()) {
+      i = ci.next();
+      if (basic(i)) {
+        if (case_flags != null) {
+        } else {
+          buf.append((char)i);
+        }
+      }
+    }
+    h = b = buf.length();
+    if (b > 0) buf.append((char)delimiter);
+    while (h < chars.length) {
+      ci.position(0);
+      i = -1;
+      m = Integer.MAX_VALUE;
+      while(ci.hasNext()) {
+        i = ci.next();
+        if (i >= n && i < m) m = i;
+      }
+      if (m - n > (Integer.MAX_VALUE - delta) / (h + 1)) 
+        throw new IOException("Overflow");
+      delta += (m-n) * (h+1);
+      n = m;
+      ci.position(0);
+      i = -1;
+      while (ci.hasNext()) {
+        i = ci.next();
+        if (i < n) {
+          if (++delta == 0) throw new IOException("Overflow");
+        }
+        if (i == n) {
+          for (q = delta, k = base;; k+= base) {
+            t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
+            if (q < t) break;
+            buf.append((char)encode_digit(t+(q-t)%(base-t),false));
+            q = (q-t) / (base-t);
+          }
+          buf.append((char)encode_digit(
+            q, (case_flags!=null)?case_flags[ci.position()-1]:false));
+          bias = adapt(delta,h+1,h==b);
+          delta=0;
+          ++h;
+        }
+      }
+      ++delta; ++n;
+    }
+    return buf;
+  }
+
+  public static String encode(String s) {
+    try {
+      if (s == null) return null;
+      return encode(s.toCharArray(),null).toString();
+    } catch (Exception e) {
+      e.printStackTrace();
+      return null;
+    }
+  }
+  
+  public static String decode(String s) {
+    try {
+      if (s == null) return null;
+      return decode(s.toCharArray(),null).toString();
+    } catch (Exception e) {
+      e.printStackTrace();
+      return null;
+    }
+  }
+  
+  public static StringBuffer decode(
+    char[] chars, 
+    boolean[] case_flags) 
+      throws IOException {
+    StringBuffer buf = new StringBuffer();
+    int n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+    n = initial_n;
+    out = i = 0;
+    bias = initial_bias;
+    for (b = j = 0;  j < chars.length; ++j)
+      if (delim(chars[j])) b = j;
+    for (j = 0; j < b; ++j) {
+      if (case_flags != null) case_flags[out] = flagged(chars[j]);
+      if (!basic(chars[j])) throw new IOException("Bad Input");
+      buf.append((char)chars[j]);
+    }
+    out = buf.length();
+    for (in = (b > 0) ? b + 1 : 0; in < chars.length; ++out) {
+      for (oldi = i, w = 1, k = base; ; k += base) {
+        if (in > chars.length) throw new IOException("Bad input");
+        digit = decode_digit(chars[in++]);
+        if (digit >= base) throw new IOException("Bad input");
+        if (digit > (Integer.MAX_VALUE - i) / w) throw new IOException("Overflow");
+        i += digit * w;
+        t = (k <= bias) ? 
+          tmin : 
+          (k >= bias + tmax) ? 
+            tmax : 
+            k - bias;
+        if (digit < t) break;
+        if (w > Integer.MAX_VALUE / (base - t)) throw new IOException("Overflow");
+        w *= (base - t);
+      }
+      bias = adapt(i - oldi, out + 1, oldi == 0);
+      if (i / (out + 1) > Integer.MAX_VALUE - n) throw new IOException("Overflow");
+      n += i / (out + 1);
+      i %= (out + 1);
+      if (case_flags != null) {
+        System.arraycopy(    // not sure if this is right
+          case_flags, i, 
+          case_flags, i+CharUtils.size(n), 
+          case_flags.length-i);
+      }
+      CharUtils.insert(buf, i++, n);
+    }
+    return buf;
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,32 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import org.apache.abdera.util.io.CodepointIterator;
+
+/**
+ * Interface implemented by custom IRI scheme parsers
+ */
+public interface Scheme {
+
+  String getName();
+  
+  boolean equivalent(IRI iri1, IRI iri2);
+  
+  boolean parse(CodepointIterator reader, Builder builder);
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,75 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Static registry of custom IRI schemes.
+ */
+public final class SchemeRegistry {
+
+  private static SchemeRegistry registry;
+  
+  public static synchronized SchemeRegistry getInstance() {
+    if (registry == null) registry = new SchemeRegistry();
+    return registry;
+  }
+  
+  private final Map<String,Scheme> schemes;
+  
+  SchemeRegistry() {
+    schemes = new HashMap<String,Scheme>();
+    schemes.put(HttpScheme.NAME, new HttpScheme());
+    schemes.put(HttpsScheme.NAME, new HttpsScheme());
+  }
+  
+  @SuppressWarnings("unchecked")
+  public synchronized boolean register(
+    String schemeClass) 
+      throws ClassNotFoundException, 
+             IllegalAccessException, 
+             InstantiationException {
+    Class<Scheme> klass = 
+      (Class<Scheme>) Thread.currentThread()
+        .getContextClassLoader().loadClass(schemeClass);
+    return register(klass);
+  }
+  
+  public synchronized boolean register(
+    Class<Scheme> schemeClass) 
+      throws IllegalAccessException, 
+             InstantiationException {
+    Scheme scheme = schemeClass.newInstance();
+    return register(scheme);
+  }
+  
+  public synchronized boolean register(Scheme scheme) {
+    String name = scheme.getName();
+    if (schemes.get(name) == null) {
+      schemes.put(name.toLowerCase(), scheme);
+      return true;
+    } else return false;
+  }
+  
+  public Scheme getScheme(String scheme) {
+    return schemes.get(scheme.toLowerCase());
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,40 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.lang;
+
+public class InvalidLangTagSyntax extends Exception {
+
+  private static final long serialVersionUID = -2653819135178550519L;
+
+  public InvalidLangTagSyntax() {
+    super();
+  }
+
+  public InvalidLangTagSyntax(String message, Throwable cause) {
+    super(message, cause);
+  }
+
+  public InvalidLangTagSyntax(String message) {
+    super(message);
+  }
+
+  public InvalidLangTagSyntax(Throwable cause) {
+    super(cause);
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,185 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.lang;
+
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.Locale;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.InvalidCharacterException;
+import org.apache.abdera.util.iri.Constants;
+
+
+/**
+ * rfc3066
+ */
+public class Lang 
+  implements Iterable<String>, 
+             Serializable, 
+             Cloneable {
+
+  public static final Lang ANY = new Lang();
+  
+  private static final long serialVersionUID = -4620499451615533855L;
+  protected final String[] tags;
+  protected final Locale locale;
+  
+  private Lang() {
+    tags = new String[] {"*"};
+    locale = null;
+  }
+  
+  public Lang(Locale locale) {
+    this.tags = locale.toString().replace("\u005F","\u002D").split("\u002D");
+    this.locale = locale;
+  }
+  
+  public Lang(String tag) throws InvalidLangTagSyntax {
+    this(parse(tag));
+  }
+  
+  public Lang(String... tags) throws InvalidLangTagSyntax {
+    verify(tags);
+    this.tags = tags;
+    this.locale = initLocale();
+  }
+
+  private Locale initLocale() {
+    Locale locale = null;
+    switch(tags.length) {
+      case 0:  break;
+      case 1:  locale = new Locale(tags[0]); break;
+      case 2:  locale = new Locale(tags[0],tags[1]); break;
+      default: locale = new Locale(tags[0],tags[1],tags[2]); break;
+    }
+    return locale;
+  }
+  
+  public String getPrimary() {
+    return tags[0];
+  }
+  
+  public String getSubtag(int n) {
+    if (n+1 > tags.length) throw new ArrayIndexOutOfBoundsException(n);
+    return tags[n+1];
+  }
+  
+  public int getSubtagCount() {
+    return tags.length-1;
+  }
+  
+  public Locale getLocale() {
+    return locale;
+  }
+
+  public String toString() {
+    StringBuffer buf = new StringBuffer();
+    for (String s: tags) {
+      if (buf.length() > 0) buf.append('\u002D');
+      buf.append(s);
+    }
+    return buf.toString();
+  }
+  
+  public static boolean matches(Lang lang, String range) throws InvalidLangTagSyntax {
+    if (range.equals("*")) return true;
+    return matches(lang, new Lang(range));
+  }
+  
+  public static boolean matches(Lang lang, Lang range) {
+    if (range.equals("*")) return true;
+    if (lang.equals(range)) return true;
+    if (lang.tags.length <= range.tags.length) return false;
+    for (int n = 0; n < range.tags.length; n++) {
+      if (!lang.tags[n].equalsIgnoreCase(range.tags[n])) return false;
+    }
+    return true;
+  }
+  
+  public boolean matches(String range) throws InvalidLangTagSyntax {
+    return matches(this,range);
+  }
+  
+  public boolean matches(Lang range) {
+    return matches(this,range);
+  }
+  
+  @Override
+  public int hashCode() {
+    final int PRIME = 31;
+    int result = 1;
+    result = PRIME * result + ((locale == null) ? 0 : locale.hashCode());
+    for (String tag: tags) {
+      result = PRIME * result + tag.hashCode();
+    }
+    return result;
+  }
+  
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (obj instanceof String) {
+      String s = (String) obj;
+      if (s.equals("*")) obj = ANY;
+      else {
+        try {
+          obj = new Lang(s);
+        } catch (Exception e) {}
+      }
+    }
+    if (getClass() != obj.getClass())
+      return false;
+    final Lang other = (Lang) obj;
+    if (tags.length != other.tags.length) return false;
+    for (int n = 0; n < tags.length; n++) {
+      if (!tags[n].equalsIgnoreCase(other.tags[n])) return false;
+    }
+    return true;
+  }
+
+  private static void verify(String[] tags) throws InvalidLangTagSyntax {
+    if (tags.length == 0) throw new InvalidLangTagSyntax();
+    String primary = tags[0];
+    try {
+      CharUtils.verify(primary,Constants.ALPHA);
+    } catch (InvalidCharacterException e) {
+      throw new InvalidLangTagSyntax();
+    }
+    for (int n = 1; n < tags.length; n++) {
+      try {
+        CharUtils.verify(tags[n],Constants.ALPHANUM);
+      } catch (InvalidCharacterException e) {
+        throw new InvalidLangTagSyntax();
+      }
+    }
+  }
+  
+  private static String[] parse(String tag) throws InvalidLangTagSyntax {
+    String[] tags = tag.split("\u002D");
+    verify(tags);
+    return tags;
+  }
+
+  public Iterator<String> iterator() {
+    return java.util.Arrays.asList(tags).iterator();
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,179 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.unicode;
+
+import java.io.IOException;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+  public enum Mask {
+    NONE,
+    COMPATIBILITY,
+    COMPOSITION
+  }
+  
+  public enum Form { 
+    D, 
+    C(Mask.COMPOSITION), 
+    KD(Mask.COMPATIBILITY), 
+    KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+    
+    private int mask = 0;
+
+    Form(Mask... masks) {
+      for (Mask mask : masks) {
+        this.mask |= (mask.ordinal());
+      }
+    }
+    
+    public boolean isCompatibility() {
+      return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+    }
+    
+    public boolean isCanonical() {
+      return !isCompatibility();
+    }
+    
+    public boolean isComposition() {
+      return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+    }
+  }
+  
+  private Normalizer() {}
+  
+  /**
+   * Normalize the string using NFKC
+   */
+  public static StringBuffer normalize(String source) throws IOException {
+    return normalize(source, Form.KC);
+  }
+  
+  /**
+   * Normalize the string using the specified Form
+   */
+  public static StringBuffer normalize(
+    String source, 
+    Form form) 
+      throws IOException {
+    return normalize(source, form, new StringBuffer());
+  }
+  
+  /**
+   * Normalize the string into the given StringBuffer using the given Form
+   */
+  public static StringBuffer normalize(
+    String source, 
+    Form form, 
+    StringBuffer buf) 
+      throws IOException {
+      UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+      if (source.length() != 0 && ucd != null) {
+        decompose(ucd, source, form, buf);
+        compose(ucd, form, buf);
+      }
+      return buf;
+  }
+  
+  private static void decompose(
+    UnicodeCharacterDatabase ucd,
+    String source, 
+    Form form, 
+    StringBuffer buf) 
+      throws IOException {
+      StringBuffer internal = new StringBuffer();
+      CodepointIterator ci = CodepointIterator.forCharSequence(source);
+      boolean canonical = form.isCanonical();
+      while (ci.hasNext()) {
+        int c = ci.next();
+        internal.setLength(0);
+        ucd.decompose(c, canonical, internal);
+        CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+        while(ii.hasNext()) {
+          int ch = ii.next();
+          int i = findInsertionPoint(ucd, buf, ch);
+          buf.insert(i,CharUtils.toString(ch));
+        }
+      }
+    
+  }
+  
+  private static int findInsertionPoint(
+    UnicodeCharacterDatabase ucd, 
+    StringBuffer buf, int c) {
+    int cc = ucd.getCanonicalClass(c);
+    int i = buf.length();
+    if (cc != 0) {
+      int ch;
+      for (; i > 0; i -= CharUtils.size(c)) {
+        ch = CharUtils.charAt(buf, i-1);
+        if (ucd.getCanonicalClass(ch) <= cc) break;
+      }
+    }
+    return i;
+  }
+  
+  private static void compose(
+    UnicodeCharacterDatabase ucd,
+    Form form, 
+    StringBuffer buf) 
+      throws IOException {
+    if (!form.isComposition()) return;
+    int pos = 0;
+    int lc = CharUtils.charAt(buf, pos);
+    int cpos = CharUtils.size(lc);    
+    int lcc = ucd.getCanonicalClass(lc);
+    if (lcc != 0) lcc = 256;
+    int len = buf.length();
+    int c;
+    for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.size(c)) {
+      c = CharUtils.charAt(buf,dpos);
+      int cc = ucd.getCanonicalClass(c);
+      int composite = ucd.getPairComposition(lc, c);
+      if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+        CharUtils.setChar(buf, pos, composite);
+        lc = composite;
+      } else {
+        if (cc == 0) {
+          pos = cpos;
+          lc = c;
+        }
+        lcc = cc;
+        CharUtils.setChar(buf,cpos,c);
+        if (buf.length() != len) {
+          dpos += buf.length() - len;
+          len = buf.length();
+        }
+        cpos += CharUtils.size(c);
+      }
+    }
+    buf.setLength(cpos);
+  }
+  
+  public static void main(String... args) throws Exception {
+    
+    UnicodeCharacterDatabase.main("src/org/apache/abdera/util/unicode/data/ucd.res");
+    
+  }
+}