You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2006/09/22 07:40:03 UTC
svn commit: r448818 [2/3] - in /incubator/abdera/java/trunk/core/src:
main/java/org/apache/abdera/util/ main/java/org/apache/abdera/util/io/
main/java/org/apache/abdera/util/iri/ main/java/org/apache/abdera/util/lang/
main/java/org/apache/abdera/util/u...
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,797 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.MalformedURLException;
+import java.net.URISyntaxException;
+import java.util.BitSet;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.unicode.Normalizer;
+
+
+public class IRI
+ implements Serializable,
+ Cloneable {
+
+ private static final long serialVersionUID = -4530530782760282284L;
+ private Scheme _scheme;
+ private String scheme;
+ private String authority;
+ private String userinfo;
+ private String host;
+ private int port;
+ private String path;
+ private String query;
+ private String fragment;
+ private boolean doubleslash;
+
+ private String a_host;
+ private String a_fragment;
+ private String a_path;
+ private String a_query;
+ private String a_userinfo;
+ private String a_authority;
+
+ private String d_authority;
+ private String d_userinfo;
+ private String d_host;
+ private String d_path;
+ private String d_query;
+ private String d_fragment;
+
+ public IRI(java.net.URL url) throws IRISyntaxException, IOException {
+ this(Escaping.encode(
+ Escaping.decode(url.toString()),
+ Constants.IUNRESERVED,
+ Constants.RESERVED,
+ Constants.PCTENC));
+ }
+
+ public IRI(java.net.URI uri) throws IRISyntaxException, IOException {
+ this(Escaping.encode(
+ Escaping.decode(uri.toString()),
+ Constants.IUNRESERVED,
+ Constants.RESERVED,
+ Constants.PCTENC,
+ Constants.GENDELIMS));
+ }
+
+ public IRI(String iri) throws IRISyntaxException, IOException {
+ Builder b = new Builder();
+ parse(iri, b);
+ init(
+ b.schemeobj,
+ b.scheme,
+ b.authority,
+ b.userinfo,
+ b.host,
+ b.port,
+ b.path,
+ b.query,
+ b.fragment,
+ b.doubleslash);
+ }
+
+ public IRI(String iri, Normalizer.Form nf) throws IRISyntaxException, IOException {
+ this(Normalizer.normalize(iri,nf).toString());
+ }
+
+ public IRI(
+ String scheme,
+ String userinfo,
+ String host,
+ int port,
+ String path,
+ String query,
+ String fragment) {
+ SchemeRegistry reg = SchemeRegistry.getInstance();
+ Scheme _scheme = reg.getScheme(scheme);
+ StringBuffer buf = new StringBuffer();
+ buildAuthority(buf,userinfo, host, port);
+ String authority = (buf.length()!=0)?buf.toString():null;
+ boolean doubleslash = (authority != null);
+ init(_scheme,scheme,authority,userinfo,
+ host,port,path,query,fragment,doubleslash);
+ }
+
+ public IRI(
+ String scheme,
+ String authority,
+ String path,
+ String query,
+ String fragment) {
+ Builder builder = new Builder();
+ if (authority != null)
+ splitAuthority(authority, builder);
+ SchemeRegistry reg = SchemeRegistry.getInstance();
+ Scheme _scheme = reg.getScheme(scheme);
+ boolean doubleslash = (authority != null);
+ init(_scheme,scheme,authority,builder.userinfo,
+ builder.host,builder.port,path,query,
+ fragment,doubleslash);
+ }
+
+ public IRI(
+ String scheme,
+ String host,
+ String path,
+ String fragment) {
+ this(scheme, null, host, -1, path, null, fragment);
+ }
+
+ IRI(
+ Scheme _scheme,
+ String scheme,
+ String authority,
+ String userinfo,
+ String host,
+ int port,
+ String path,
+ String query,
+ String fragment,
+ boolean doubleslash) {
+ init(_scheme,scheme,authority,userinfo,
+ host,port,path,query,fragment,doubleslash);
+ }
+
+ private void init(
+ Scheme _scheme,
+ String scheme,
+ String authority,
+ String userinfo,
+ String host,
+ int port,
+ String path,
+ String query,
+ String fragment,
+ boolean doubleslash) {
+ this._scheme = _scheme;
+ this.scheme = scheme;
+ this.authority = authority;
+ this.userinfo = userinfo;
+ this.host = host;
+ this.port = port;
+ this.path = (path != null) ? path : "";
+ this.query = query;
+ this.fragment = fragment;
+ this.doubleslash = doubleslash;
+
+ d_authority = Escaping.decode(authority);
+ d_userinfo = Escaping.decode(userinfo);
+ d_path = Escaping.decode(path);
+ d_query = Escaping.decode(query);
+ d_fragment = Escaping.decode(fragment);
+ d_host = Escaping.decode(host);
+
+ a_host = IDNA.toASCII(d_host);
+ a_fragment = Escaping.encode(getFragment(),Constants.FRAGMENT);
+ a_path = normalize(Escaping.encode(getPath(), Constants.PATH));
+ a_query = Escaping.encode(getQuery(),Constants.QUERY);
+ a_userinfo = Escaping.encode(getUserInfo(),Constants.USERINFO);
+ a_authority = buildASCIIAuthority();
+ }
+
+ @Override
+ public int hashCode() {
+ final int PRIME = 31;
+ int result = 1;
+ result = PRIME * result + ((authority == null) ? 0 : authority.hashCode());
+ result = PRIME * result + ((fragment == null) ? 0 : fragment.hashCode());
+ result = PRIME * result + ((host == null) ? 0 : host.hashCode());
+ result = PRIME * result + ((path == null) ? 0 : path.hashCode());
+ result = PRIME * result + port;
+ result = PRIME * result + ((query == null) ? 0 : query.hashCode());
+ result = PRIME * result + ((scheme == null) ? 0 : scheme.hashCode());
+ result = PRIME * result + ((userinfo == null) ? 0 : userinfo.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ final IRI other = (IRI) obj;
+ if (authority == null) {
+ if (other.authority != null)
+ return false;
+ } else if (!authority.equals(other.authority))
+ return false;
+ if (fragment == null) {
+ if (other.fragment != null)
+ return false;
+ } else if (!fragment.equals(other.fragment))
+ return false;
+ if (host == null) {
+ if (other.host != null)
+ return false;
+ } else if (!host.equals(other.host))
+ return false;
+ if (path == null) {
+ if (other.path != null)
+ return false;
+ } else if (!path.equals(other.path))
+ return false;
+ if (port != other.port)
+ return false;
+ if (query == null) {
+ if (other.query != null)
+ return false;
+ } else if (!query.equals(other.query))
+ return false;
+ if (scheme == null) {
+ if (other.scheme != null)
+ return false;
+ } else if (!scheme.equals(other.scheme))
+ return false;
+ if (userinfo == null) {
+ if (other.userinfo != null)
+ return false;
+ } else if (!userinfo.equals(other.userinfo))
+ return false;
+ return true;
+ }
+
+ public boolean equivalent(IRI uri) {
+ if (_scheme != null) return _scheme.equivalent(this, uri);
+ else {
+ String s2 = uri.normalize().toASCIIString();
+ String s1 = this.normalize().toASCIIString();
+ return s1.compareTo(s2) == 0;
+ }
+ }
+
+ public String getAuthority() {
+ return d_authority;
+ }
+
+ public String getFragment() {
+ return d_fragment;
+ }
+
+ public String getHost() {
+ return d_host;
+ }
+
+ public IDNA getIDN() {
+ return new IDNA(d_host);
+ }
+
+ public String getASCIIHost() {
+ return a_host;
+ }
+
+ public String getPath() {
+ return d_path;
+ }
+
+ public int getPort() {
+ return port;
+ }
+
+ public String getQuery() {
+ return d_query;
+ }
+
+ public String getScheme() {
+ return (scheme != null) ? scheme.toLowerCase() : null;
+ }
+
+ public String getSchemeSpecificPart() {
+ return buildSchemeSpecificPart(
+ userinfo,
+ host,
+ port,
+ path,
+ query,
+ fragment);
+ }
+
+ public String getUserInfo() {
+ return d_userinfo;
+ }
+
+ public String getRawAuthority() {
+ return authority;
+ }
+
+ public String getRawFragment() {
+ return fragment;
+ }
+
+ public String getRawPath() {
+ return path;
+ }
+
+ public String getRawQuery() {
+ return query;
+ }
+
+ public String getRawSchemeSpecificPart() {
+ return buildSchemeSpecificPart(
+ userinfo,
+ host,
+ port,
+ path,
+ query,
+ fragment);
+ }
+
+ public String getRawUserInfo() {
+ return userinfo;
+ }
+
+ private void buildAuthority(
+ StringBuffer buf,
+ String aui,
+ String ah,
+ int port) {
+ if (aui != null && aui.length() != 0) {
+ buf.append(aui);
+ buf.append('@');
+ }
+ if (ah != null && ah.length() != 0) {
+ buf.append(ah);
+ }
+ if (port != -1) {
+ buf.append(':');
+ buf.append(port);
+ }
+ }
+
+ private String buildASCIIAuthority() {
+ StringBuffer buf = new StringBuffer();
+ String aui = getASCIIUserInfo();
+ String ah = getASCIIHost();
+ int port = getPort();
+ buildAuthority(buf,aui,ah,port);
+ return buf.toString();
+ }
+
+ public String getASCIIAuthority() {
+ return a_authority;
+ }
+
+ public String getASCIIFragment() {
+ return a_fragment;
+ }
+
+ public String getASCIIPath() {
+ return a_path;
+ }
+
+ public String getASCIIQuery() {
+ return a_query;
+ }
+
+ public String getASCIIUserInfo() {
+ return a_userinfo;
+ }
+
+ public String getASCIISchemeSpecificPart() {
+ return buildSchemeSpecificPart(
+ getASCIIUserInfo(),
+ getASCIIHost(),
+ getPort(),
+ getASCIIPath(),
+ getASCIIQuery(),
+ getASCIIFragment());
+ }
+
+ private String buildSchemeSpecificPart(
+ String userinfo,
+ String host,
+ int port,
+ String path,
+ String query,
+ String fragment) {
+ StringBuffer buf = new StringBuffer();
+ if (doubleslash) buf.append("//");
+ buildAuthority(buf, userinfo, host, port);
+ if (path != null && path.length() != 0) {
+ buf.append(path);
+ }
+ if (query != null && query.length() != 0) {
+ buf.append('?');
+ buf.append(query);
+ }
+ if (fragment != null && fragment.length() != 0) {
+ buf.append('#');
+ buf.append(fragment);
+ }
+ return buf.toString();
+ }
+
+ public Object clone() throws CloneNotSupportedException {
+ return super.clone();
+ }
+
+ public boolean isAbsolute() {
+ return scheme != null;
+ }
+
+ public boolean isOpaque() {
+ return path == null;
+ }
+
+ public static IRI relativize(IRI b, IRI c) {
+ if (c.isOpaque() || b.isOpaque()) return c;
+ if ((b.scheme == null && c.scheme != null) ||
+ (b.scheme != null && c.scheme == null) ||
+ (b.scheme != null && c.scheme != null &&
+ !b.scheme.equalsIgnoreCase(c.scheme))) return c;
+ String bpath = normalize(b.getPath());
+ String cpath = normalize(c.getPath());
+ bpath = (bpath != null) ? bpath : "/";
+ cpath = (cpath != null) ? cpath : "/";
+ if (!bpath.equals(cpath)) {
+ if (bpath.charAt(bpath.length()-1) != '/') bpath += "/";
+ if (!cpath.startsWith(bpath)) return c;
+ }
+ IRI iri = new IRI(
+ null,
+ null,null,null,null,-1,
+ normalize(cpath.substring(bpath.length())),
+ c.getQuery(),
+ c.getFragment(),
+ false);
+ return iri;
+ }
+
+ public IRI relativize(IRI iri) {
+ return relativize(this, iri);
+ }
+
+ public boolean isPathAbsolute() {
+ String path = getPath();
+ return (path != null) && path.length() > 0 && path.charAt(0) == '/';
+ }
+
+ public boolean isSameDocumentReference() {
+ return scheme == null &&
+ authority == null &&
+ (path == null ||
+ path.length() == 0 ||
+ path.equals(".")) &&
+ query == null;
+ }
+
+ public static IRI resolve(IRI b, String c) throws IRISyntaxException, IOException {
+ return resolve(b, IRI.create(c));
+ }
+
+ public static IRI resolve(IRI b, IRI c) {
+ if (c.isOpaque() || b.isOpaque()) return c;
+ if (c.isSameDocumentReference()) {
+ String cfragment = c.getFragment();
+ String bfragment = b.getFragment();
+ if ((cfragment == null && bfragment == null) ||
+ (cfragment != null && cfragment.equals(bfragment))) {
+ try {
+ return (IRI) b.clone();
+ } catch (Exception e) {
+ return null; // Not going to happen
+ }
+ } else {
+ return new IRI(
+ b._scheme,
+ b.getScheme(),
+ b.getAuthority(),
+ b.getUserInfo(),
+ b.getHost(),
+ b.getPort(),
+ normalize(b.getPath()),
+ b.getQuery(),
+ cfragment,
+ b.doubleslash
+ );
+ }
+ }
+ if (c.isAbsolute()) return c;
+
+ Scheme _scheme = b._scheme;
+ String scheme = b.scheme;
+ boolean ds = b.doubleslash;
+ String query = c.getQuery();
+ String fragment = c.getFragment();
+ String userinfo = null;
+ String authority = null;
+ String host = null;
+ int port = -1;
+ String path = null;
+ if (c.getAuthority() == null) {
+ authority = b.getAuthority();
+ userinfo = b.getUserInfo();
+ host = b.getHost();
+ port = b.getPort();
+ path = c.isPathAbsolute() ? normalize(c.getPath()) : resolve(b.getPath(),c.getPath());
+ } else {
+ authority = c.getAuthority();
+ userinfo = c.getUserInfo();
+ host = c.getHost();
+ port = c.getPort();
+ path = normalize(c.getPath());
+ }
+ return new IRI(_scheme,scheme,authority,userinfo,host,port,path,query,fragment,ds);
+ }
+
+ public IRI normalize() {
+ return normalize(this);
+ }
+
+ public static IRI normalize(IRI iri) {
+ if (iri.isOpaque() || iri.getPath() == null) return iri;
+ return new IRI(
+ iri._scheme,
+ iri.getScheme(),
+ iri.getAuthority(),
+ iri.getUserInfo(),
+ iri.getHost(),
+ iri.getPort(),
+ normalize(iri.getPath()),
+ iri.getQuery(),
+ iri.getFragment(),
+ iri.doubleslash
+ );
+ }
+
+ private static String normalize(String path) {
+ if (path == null) return "/";
+ String[] segments = path.split("/");
+ if (segments.length < 2) return path;
+ StringBuffer buf = new StringBuffer("/");
+ for (int n = 0; n < segments.length; n++) {
+ String segment = segments[n].intern();
+ if (segment == ".") {
+ segments[n] = null;
+ } else if (segment == "..") {
+ segments[n] = null;
+ int i = n;
+ while(--i > -1) {
+ if (segments[i] != null) break;
+ }
+ if (i > -1) segments[i] = null;
+ }
+ }
+ for (int n = 0; n < segments.length; n++) {
+ if (segments[n] != null) {
+ if (buf.length() > 1) buf.append('/');
+ buf.append(segments[n]);
+ }
+ }
+ if (path.charAt(path.length()-1) == '/') buf.append('/');
+ return buf.toString();
+ }
+
+ private static String resolve(String bpath, String cpath) {
+ if (bpath == null && cpath == null) return null;
+ if (bpath == null && cpath != null) return cpath;
+ if (bpath != null && cpath == null) return bpath;
+ StringBuffer buf = new StringBuffer("");
+ int n = bpath.lastIndexOf('/');
+ if (n > -1) buf.append(bpath.substring(0,n+1));
+ if (cpath.length() != 0) buf.append(cpath);
+ return normalize(buf.toString());
+ }
+
+ public IRI resolve(IRI iri) {
+ return resolve(this,iri);
+ }
+
+ public IRI resolve(String iri) throws IRISyntaxException, IOException {
+ return resolve(this,IRI.create(iri));
+ }
+
+ public String toString() {
+ StringBuffer buf = new StringBuffer();
+ String scheme = getScheme();
+ if (scheme != null && scheme.length() != 0) {
+ buf.append(scheme);
+ buf.append(':');
+ }
+ buf.append(getSchemeSpecificPart());
+ return buf.toString();
+ }
+
+ public String toASCIIString() {
+ StringBuffer buf = new StringBuffer();
+ String scheme = getScheme();
+ if (scheme != null && scheme.length() != 0) {
+ buf.append(scheme);
+ buf.append(':');
+ }
+ buf.append(getASCIISchemeSpecificPart());
+ return buf.toString();
+ }
+
+ public String toBIDIString() {
+ StringBuffer buf = new StringBuffer(toString());
+ if (buf.length() > 0) {
+ if (buf.charAt(0) != '\u202A') buf.insert(0,'\u202A');
+ if (buf.charAt(buf.length()-1) != '\u202C') buf.append('\u202C');
+ }
+ return buf.toString();
+ }
+
+ public java.net.URI toURI() throws URISyntaxException {
+ return new java.net.URI(toASCIIString());
+ }
+
+ public java.net.URL toURL() throws MalformedURLException, URISyntaxException {
+ return toURI().toURL();
+ }
+
+ ////////// parse implementation
+
+ private static void parse(String uri, Builder builder) throws IRISyntaxException, IOException {
+ SchemeRegistry reg = SchemeRegistry.getInstance();
+ builder.chars = uri.toCharArray();
+ CodepointIterator ci = CodepointIterator.forCharArray(builder.chars);
+ Parser.parse(ci, builder, reg);
+ }
+
+ public static IRI create(String iri) throws IRISyntaxException, IOException {
+ return new IRI(iri);
+ }
+
+ public static IRI create(String iri, Normalizer.Form nf) throws IRISyntaxException, IOException {
+ return new IRI(iri,nf);
+ }
+
+ static void splitAuthority(String authority, Builder builder) {
+ if (authority != null) {
+ int n = authority.indexOf('@');
+ if (n > -1) builder.userinfo = authority.substring(0,n);
+ int a = authority.indexOf('[',n);
+ if (a > -1) {
+ int m = authority.indexOf(']',a);
+ if (m > -1) a = m;
+ a = authority.indexOf(':',a);
+ } else
+ a = authority.indexOf(':',n);
+ if (a > -1) {
+ builder.host = authority.substring(n+1,a);
+ String p = authority.substring(a+1);
+ if (p.length() > 0) {
+ try {
+ builder.port = Integer.parseInt(p);
+ } catch (Exception e) {}
+ }
+ } else builder.host = authority.substring(n+1);
+ }
+ }
+
+ static class Builder implements org.apache.abdera.util.iri.Builder {
+ private Scheme schemeobj;
+ private char[] chars;
+ private String scheme;
+ private String authority;
+ private String userinfo;
+ private String host;
+ private int port = -1;
+ private String path;
+ private String query;
+ private String fragment;
+ private boolean doubleslash;
+
+ private void setScheme(Scheme scheme) {
+ this.schemeobj = scheme;
+ }
+
+ public void scheme(int s, int l) {
+ scheme = (l > 0) ? new String(chars,s,l).toLowerCase() : null;
+ }
+ public void authority(int s, int l) {
+ authority = (l > 0) ? new String(chars,s,l) : null;
+ splitAuthority(authority, this);
+ }
+ public void path(int s, int l) {
+ path = (l > 0) ? new String(chars,s,l) : null;
+ }
+ public void query(int s, int l) {
+ query = (l > 0) ? new String(chars,s,l) : null;
+ }
+ public void fragment(int s, int l) {
+ fragment = (l > 0) ? new String(chars,s,l) : null;
+ }
+
+ public IRI getAtomURI() {
+ return new IRI(
+ schemeobj,
+ scheme,authority,userinfo,
+ host,port,path,query,fragment,
+ doubleslash);
+ }
+ }
+
+ static class Parser {
+ static void parse(CodepointIterator ci, Builder builder, SchemeRegistry reg)
+ throws IRISyntaxException,
+ IOException {
+ int e = ci.position();
+ scan(ci,Constants.SCHEME,-1);
+ if (ci.peek() == ':')
+ builder.scheme(e,ci.position()-e);
+ Scheme _scheme = null;
+ if (builder.scheme != null && builder.scheme.length() != 0)
+ _scheme = reg.getScheme(builder.scheme);
+ if (_scheme != null) {
+ // allow for scheme specific parsing. if the resolved scheme
+ // does parse the result, skip the rest, otherwise, do the
+ // default parsing
+ builder.setScheme(_scheme);
+ if (_scheme.parse(ci, builder)) return;
+ }
+ // default parsing. works for most common schemes
+ else ci.position(e);
+ scan(ci, Constants.COLON,1);
+ e = ci.position();
+ if (ci.peek() == '/' &&
+ ci.peek(ci.position() + 1) == '/') {
+ scan(ci,Constants.SLASH,2);
+ builder.doubleslash = true;
+ }
+ e = ci.position();
+ int f = find(ci,Constants.SEPS);
+ if(f != 0) {
+ scan(ci,Constants.ISERVER,-1);
+ if (ci.peek() == -1 || CharUtils.isSet(ci.peek(), Constants.SEPS)) {
+ builder.authority(e,ci.position()-e);
+ }
+ else ci.position(e);
+ e = ci.position();
+ }
+ scan(ci,Constants.IPATH,-1);
+ builder.path(e,ci.position()-e);
+ scan(ci,Constants.QUERYMARK,-1);
+ e = ci.position();
+ scan(ci,Constants.IQUERY,-1);
+ builder.query(e,ci.position()-e);
+ scan(ci,Constants.HASH,-1);
+ e = ci.position();
+ scan(ci,Constants.IFRAGMENT,-1);
+ builder.fragment(e,ci.position()-e);
+ }
+ }
+
+ private static int find(CodepointIterator ci, BitSet set) throws IOException {
+ int n = ci.position();
+ int c = -1;
+ while((c = ci.peek(n++)) != -1 && set.get(c)) { n++; }
+ return n-1;
+ }
+
+ private static int scan(CodepointIterator ci, BitSet set, int count) throws IOException, IRISyntaxException {
+ while (ci.hasNext() && ci.peek() != -1 && set.get(ci.peek())){
+ int p = ci.next();
+ if (!set.get(p)) {
+ if (!CharUtils.isSet(p, Constants.RESERVED, Constants.IUNRESERVED, Constants.HASH))
+ throw new IRISyntaxException("Invalid Character (0x" + Integer.toHexString(p) + ") In URI");
+ return -1;
+ }
+ }
+ return -1;
+ }
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,28 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+
+class IRISyntaxException extends IOException {
+ private static final long serialVersionUID = 5177739661976965423L;
+
+ IRISyntaxException(String message) {
+ super(message);
+ }
+}
\ No newline at end of file
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,922 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.BitSet;
+
+import org.apache.abdera.util.ChainableBitSet;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.unicode.Normalizer;
+
+
+/**
+ * Implements the Nameprep protocol
+ */
+public class Nameprep {
+
+ public static String prep(String s, boolean allowunassigned) {
+ NameprepCodepointIterator r = null;
+ try {
+ StringBuffer buf = new StringBuffer();
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ r = new NameprepCodepointIterator(ci,allowunassigned);
+ while(r.hasNext()) {
+ int i = r.next();
+ if (i != -1)
+ buf.append((char)i);
+ }
+ return Normalizer.normalize(buf.toString(),Normalizer.Form.KC).toString();
+ } catch (Throwable e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static String prep(String s) {
+ return prep(s,false);
+ }
+
+ public static final BitSet B1 = new ChainableBitSet()
+ .set2('\u00AD','\u034F','\u1806','\u180B','\u180C','\u180D','\u200B',
+ '\u200C','\u200D','\u2060','\uFE00','\uFE01','\uFE02','\uFE03',
+ '\uFE04','\uFE05','\uFE06','\uFE07','\uFE08','\uFE09','\uFE0A',
+ '\uFE0B','\uFE0C','\uFE0D','\uFE0E','\uFE0F','\uFEFF',
+ // Not listed in B1, but treated as map-to-nothing by others
+ 0x8F,0xA0,0x86,0x8b,0x80,0x81,0x88
+ );
+
+ public static final int[][] B2 = loadb2();
+
+ public static final int[] B2(int c) {
+ return (B2[c] != null) ? B2[c] : null;
+ }
+
+ private static int[] parse(String rep) {
+ String[] tokens = rep.trim().split(" ");
+ int[] i = new int[tokens.length];
+ for (int n = 0; n < tokens.length; n++) {
+ i[n]=Integer.parseInt(tokens[n],16);
+ }
+ return i;
+ }
+
+ private static final String B2DAT = "/org/apache/abdera/util/iri/data/B2.dat";
+
+ public static int[][] loadb2() {
+ int[][] map = new int[120764][];
+ try {
+ InputStream in = Nameprep.class.getResourceAsStream(B2DAT);
+ InputStreamReader rdr = new InputStreamReader(in);
+ BufferedReader bufr = new BufferedReader(rdr);
+ String line = null;
+ while((line = bufr.readLine()) != null) {
+ String[] tokens = line.trim().split("\\s*;\\s*");
+ int n = Integer.parseInt(tokens[0],16);
+ int[] i = parse(tokens[1]);
+ map[n] = i;
+ }
+ } catch (IOException e) {
+ }
+ return map;
+ }
+
+ public static final BitSet UNASSIGNED = new ChainableBitSet()
+ .set2('\u0221')
+ .set2('\u0234','\u024F')
+ .set2('\u02AE','\u02AF')
+ .set2('\u02EF','\u02FF')
+ .set2('\u0350','\u035F')
+ .set2('\u0370','\u0373')
+ .set2('\u0376','\u0379')
+ .set2('\u037B','\u037D')
+ .set2('\u037F','\u0383')
+ .set2('\u038B')
+ .set2('\u038D')
+ .set2('\u03A2')
+ .set2('\u03CF')
+ .set2('\u03F7','\u03FF')
+ .set2('\u0487')
+ .set2('\u04CF')
+ .set2('\u04F6','\u04F7')
+ .set2('\u04FA','\u04FF')
+ .set2('\u0510','\u0530')
+ .set2('\u0557','\u0558')
+ .set2('\u0560')
+ .set2('\u0588')
+ .set2('\u058B','\u0590')
+ .set2('\u05A2')
+ .set2('\u05BA')
+ .set2('\u05C5','\u05CF')
+ .set2('\u05EB','\u05EF')
+ .set2('\u05F5','\u060B')
+ .set2('\u060D','\u061A')
+ .set2('\u061C','\u061E')
+ .set2('\u0620')
+ .set2('\u063B','\u063F')
+ .set2('\u0656','\u065F')
+ .set2('\u06EE','\u06EF')
+ .set2('\u06FF')
+ .set2('\u070E')
+ .set2('\u072D','\u072F')
+ .set2('\u074B','\u077F')
+ .set2('\u07B2','\u0900')
+ .set2('\u0904')
+ .set2('\u093A','\u093B')
+ .set2('\u094E','\u094F')
+ .set2('\u0955','\u0957')
+ .set2('\u0971','\u0980')
+ .set2('\u0984')
+ .set2('\u098D','\u098E')
+ .set2('\u0991','\u0992')
+ .set2('\u09A9')
+ .set2('\u09B1')
+ .set2('\u09B3','\u09B5')
+ .set2('\u09BA','\u09BB')
+ .set2('\u09BD')
+ .set2('\u09C5','\u09C6')
+ .set2('\u09C9','\u09CA')
+ .set2('\u09CE','\u09D6')
+ .set2('\u09D8','\u09DB')
+ .set2('\u09DE')
+ .set2('\u09E4','\u09E5')
+ .set2('\u09FB','\u0A01')
+ .set2('\u0A03','\u0A04')
+ .set2('\u0A0B','\u0A0E')
+ .set2('\u0A11','\u0A12')
+ .set2('\u0A29')
+ .set2('\u0A31')
+ .set2('\u0A34')
+ .set2('\u0A37')
+ .set2('\u0A3A','\u0A3B')
+ .set2('\u0A3D')
+ .set2('\u0A43','\u0A46')
+ .set2('\u0A49','\u0A4A')
+ .set2('\u0A4E','\u0A58')
+ .set2('\u0A5D')
+ .set2('\u0A5F','\u0A65')
+ .set2('\u0A75','\u0A80')
+ .set2('\u0A84')
+ .set2('\u0A8C')
+ .set2('\u0A8E')
+ .set2('\u0A92')
+ .set2('\u0AA9')
+ .set2('\u0AB1')
+ .set2('\u0AB4')
+ .set2('\u0ABA','\u0ABB')
+ .set2('\u0AC6')
+ .set2('\u0ACA')
+ .set2('\u0ACE','\u0ACF')
+ .set2('\u0AD1','\u0ADF')
+ .set2('\u0AE1','\u0AE5')
+ .set2('\u0AF0','\u0B00')
+ .set2('\u0B04')
+ .set2('\u0B0D','\u0B0E')
+ .set2('\u0B11','\u0B12')
+ .set2('\u0B29')
+ .set2('\u0B31')
+ .set2('\u0B34','\u0B35')
+ .set2('\u0B3A','\u0B3B')
+ .set2('\u0B44','\u0B46')
+ .set2('\u0B49','\u0B4A')
+ .set2('\u0B4E','\u0B55')
+ .set2('\u0B58','\u0B5B')
+ .set2('\u0B5E')
+ .set2('\u0B62','\u0B65')
+ .set2('\u0B71','\u0B81')
+ .set2('\u0B84')
+ .set2('\u0B8B','\u0B8D')
+ .set2('\u0B91')
+ .set2('\u0B96','\u0B98')
+ .set2('\u0B9B')
+ .set2('\u0B9D')
+ .set2('\u0BA0','\u0BA2')
+ .set2('\u0BA5','\u0BA7')
+ .set2('\u0BAB','\u0BAD')
+ .set2('\u0BB6')
+ .set2('\u0BBA','\u0BBD')
+ .set2('\u0BC3','\u0BC5')
+ .set2('\u0BC9')
+ .set2('\u0BCE','\u0BD6')
+ .set2('\u0BD8','\u0BE6')
+ .set2('\u0BF3','\u0C00')
+ .set2('\u0C04')
+ .set2('\u0C0D')
+ .set2('\u0C11')
+ .set2('\u0C29')
+ .set2('\u0C34')
+ .set2('\u0C3A','\u0C3D')
+ .set2('\u0C45')
+ .set2('\u0C49')
+ .set2('\u0C4E','\u0C54')
+ .set2('\u0C57','\u0C5F')
+ .set2('\u0C62','\u0C65')
+ .set2('\u0C70','\u0C81')
+ .set2('\u0C84')
+ .set2('\u0C8D')
+ .set2('\u0C91')
+ .set2('\u0CA9')
+ .set2('\u0CB4')
+ .set2('\u0CBA','\u0CBD')
+ .set2('\u0CC5')
+ .set2('\u0CC9')
+ .set2('\u0CCE','\u0CD4')
+ .set2('\u0CD7','\u0CDD')
+ .set2('\u0CDF')
+ .set2('\u0CE2','\u0CE5')
+ .set2('\u0CF0','\u0D01')
+ .set2('\u0D04')
+ .set2('\u0D0D')
+ .set2('\u0D11')
+ .set2('\u0D29')
+ .set2('\u0D3A','\u0D3D')
+ .set2('\u0D44','\u0D45')
+ .set2('\u0D49')
+ .set2('\u0D4E','\u0D56')
+ .set2('\u0D58','\u0D5F')
+ .set2('\u0D62','\u0D65')
+ .set2('\u0D70','\u0D81')
+ .set2('\u0D84')
+ .set2('\u0D97','\u0D99')
+ .set2('\u0DB2')
+ .set2('\u0DBC')
+ .set2('\u0DBE','\u0DBF')
+ .set2('\u0DC7','\u0DC9')
+ .set2('\u0DCB','\u0DCE')
+ .set2('\u0DD5')
+ .set2('\u0DD7')
+ .set2('\u0DE0','\u0DF1')
+ .set2('\u0DF5','\u0E00')
+ .set2('\u0E3B','\u0E3E')
+ .set2('\u0E5C','\u0E80')
+ .set2('\u0E83')
+ .set2('\u0E85','\u0E86')
+ .set2('\u0E89')
+ .set2('\u0E8B','\u0E8C')
+ .set2('\u0E8E','\u0E93')
+ .set2('\u0E98')
+ .set2('\u0EA0')
+ .set2('\u0EA4')
+ .set2('\u0EA6')
+ .set2('\u0EA8','\u0EA9')
+ .set2('\u0EAC')
+ .set2('\u0EBA')
+ .set2('\u0EBE','\u0EBF')
+ .set2('\u0EC5')
+ .set2('\u0EC7')
+ .set2('\u0ECE','\u0ECF')
+ .set2('\u0EDA','\u0EDB')
+ .set2('\u0EDE','\u0EFF')
+ .set2('\u0F48')
+ .set2('\u0F6B','\u0F70')
+ .set2('\u0F8C','\u0F8F')
+ .set2('\u0F98')
+ .set2('\u0FBD')
+ .set2('\u0FCD','\u0FCE')
+ .set2('\u0FD0','\u0FFF')
+ .set2('\u1022')
+ .set2('\u1028')
+ .set2('\u102B')
+ .set2('\u1033','\u1035')
+ .set2('\u103A','\u103F')
+ .set2('\u105A','\u109F')
+ .set2('\u10C6','\u10CF')
+ .set2('\u10F9','\u10FA')
+ .set2('\u10FC','\u10FF')
+ .set2('\u115A','\u115E')
+ .set2('\u11A3','\u11A7')
+ .set2('\u11FA','\u11FF')
+ .set2('\u1207')
+ .set2('\u1247')
+ .set2('\u1249')
+ .set2('\u124E','\u124F')
+ .set2('\u1257')
+ .set2('\u1259')
+ .set2('\u125E','\u125F')
+ .set2('\u1287')
+ .set2('\u1289')
+ .set2('\u128E','\u128F')
+ .set2('\u12AF')
+ .set2('\u12B1')
+ .set2('\u12B6','\u12B7')
+ .set2('\u12BF')
+ .set2('\u12C1')
+ .set2('\u12C6','\u12C7')
+ .set2('\u12CF')
+ .set2('\u12D7')
+ .set2('\u12EF')
+ .set2('\u130F')
+ .set2('\u1311')
+ .set2('\u1316','\u1317')
+ .set2('\u131F')
+ .set2('\u1347')
+ .set2('\u135B','\u1360')
+ .set2('\u137D','\u139F')
+ .set2('\u13F5','\u1400')
+ .set2('\u1677','\u167F')
+ .set2('\u169D','\u169F')
+ .set2('\u16F1','\u16FF')
+ .set2('\u170D')
+ .set2('\u1715','\u171F')
+ .set2('\u1737','\u173F')
+ .set2('\u1754','\u175F')
+ .set2('\u176D')
+ .set2('\u1771')
+ .set2('\u1774','\u177F')
+ .set2('\u17DD','\u17DF')
+ .set2('\u17EA','\u17FF')
+ .set2('\u180F')
+ .set2('\u181A','\u181F')
+ .set2('\u1878','\u187F')
+ .set2('\u18AA','\u1DFF')
+ .set2('\u1E9C','\u1E9F')
+ .set2('\u1EFA','\u1EFF')
+ .set2('\u1F16','\u1F17')
+ .set2('\u1F1E','\u1F1F')
+ .set2('\u1F46','\u1F47')
+ .set2('\u1F4E','\u1F4F')
+ .set2('\u1F58')
+ .set2('\u1F5A')
+ .set2('\u1F5C')
+ .set2('\u1F5E')
+ .set2('\u1F7E','\u1F7F')
+ .set2('\u1FB5')
+ .set2('\u1FC5')
+ .set2('\u1FD4','\u1FD5')
+ .set2('\u1FDC')
+ .set2('\u1FF0','\u1FF1')
+ .set2('\u1FF5')
+ .set2('\u1FFF')
+ .set2('\u2053','\u2056')
+ .set2('\u2058','\u205E')
+ .set2('\u2064','\u2069')
+ .set2('\u2072','\u2073')
+ .set2('\u208F','\u209F')
+ .set2('\u20B2','\u20CF')
+ .set2('\u20EB','\u20FF')
+ .set2('\u213B','\u213C')
+ .set2('\u214C','\u2152')
+ .set2('\u2184','\u218F')
+ .set2('\u23CF','\u23FF')
+ .set2('\u2427','\u243F')
+ .set2('\u244B','\u245F')
+ .set2('\u24FF')
+ .set2('\u2614','\u2615')
+ .set2('\u2618')
+ .set2('\u267E','\u267F')
+ .set2('\u268A','\u2700')
+ .set2('\u2705')
+ .set2('\u270A','\u270B')
+ .set2('\u2728')
+ .set2('\u274C')
+ .set2('\u274E')
+ .set2('\u2753','\u2755')
+ .set2('\u2757')
+ .set2('\u275F','\u2760')
+ .set2('\u2795','\u2797')
+ .set2('\u27B0')
+ .set2('\u27BF','\u27CF')
+ .set2('\u27EC','\u27EF')
+ .set2('\u2B00','\u2E7F')
+ .set2('\u2E9A')
+ .set2('\u2EF4','\u2EFF')
+ .set2('\u2FD6','\u2FEF')
+ .set2('\u2FFC','\u2FFF')
+ .set2('\u3040')
+ .set2('\u3097','\u3098')
+ .set2('\u3100','\u3104')
+ .set2('\u312D','\u3130')
+ .set2('\u318F')
+ .set2('\u31B8','\u31EF')
+ .set2('\u321D','\u321F')
+ .set2('\u3244','\u3250')
+ .set2('\u327C','\u327E')
+ .set2('\u32CC','\u32CF')
+ .set2('\u32FF')
+ .set2('\u3377','\u337A')
+ .set2('\u33DE','\u33DF')
+ .set2('\u33FF')
+ .set2('\u4DB6','\u4DFF')
+ .set2('\u9FA6','\u9FFF')
+ .set2('\uA48D','\uA48F')
+ .set2('\uA4C7','\uABFF')
+ .set2('\uD7A4','\uD7FF')
+ .set2('\uFA2E','\uFA2F')
+ .set2('\uFA6B','\uFAFF')
+ .set2('\uFB07','\uFB12')
+ .set2('\uFB18','\uFB1C')
+ .set2('\uFB37')
+ .set2('\uFB3D')
+ .set2('\uFB3F')
+ .set2('\uFB42')
+ .set2('\uFB45')
+ .set2('\uFBB2','\uFBD2')
+ .set2('\uFD40','\uFD4F')
+ .set2('\uFD90','\uFD91')
+ .set2('\uFDC8','\uFDCF')
+ .set2('\uFDFD','\uFDFF')
+ .set2('\uFE10','\uFE1F')
+ .set2('\uFE24','\uFE2F')
+ .set2('\uFE47','\uFE48')
+ .set2('\uFE53')
+ .set2('\uFE67')
+ .set2('\uFE6C','\uFE6F')
+ .set2('\uFE75')
+ .set2('\uFEFD','\uFEFE')
+ .set2('\uFF00')
+ .set2('\uFFBF','\uFFC1')
+ .set2('\uFFC8','\uFFC9')
+ .set2('\uFFD0','\uFFD1')
+ .set2('\uFFD8','\uFFD9')
+ .set2('\uFFDD','\uFFDF')
+ .set2('\uFFE7')
+ .set2('\uFFEF','\uFFF8')
+ .set2(0x10000,0x102FF)
+ .set2(0x1031F)
+ .set2(0x10324,0x1032F)
+ .set2(0x1034B,0x103FF)
+ .set2(0x10426,0x10427)
+ .set2(0x1044E,0x1CFFF)
+ .set2(0x1D0F6,0x1D0FF)
+ .set2(0x1D127,0x1D129)
+ .set2(0x1D1DE,0x1D3FF)
+ .set2(0x1D455)
+ .set2(0x1D49D)
+ .set2(0x1D4A0,0x1D4A1)
+ .set2(0x1D4A3,0x1D4A4)
+ .set2(0x1D4A7,0x1D4A8)
+ .set2(0x1D4AD)
+ .set2(0x1D4BA)
+ .set2(0x1D4BC)
+ .set2(0x1D4C1)
+ .set2(0x1D4C4)
+ .set2(0x1D506)
+ .set2(0x1D50B,0x1D50C)
+ .set2(0x1D515)
+ .set2(0x1D51D)
+ .set2(0x1D53A)
+ .set2(0x1D53F)
+ .set2(0x1D545)
+ .set2(0x1D547,0x1D549)
+ .set2(0x1D551)
+ .set2(0x1D6A4,0x1D6A7)
+ .set2(0x1D7CA,0x1D7CD)
+ .set2(0x1D800,0x1FFFD)
+ .set2(0x2A6D7,0x2F7FF)
+ .set2(0x2FA1E,0x2FFFD)
+ .set2(0x30000,0x3FFFD)
+ .set2(0x40000,0x4FFFD)
+ .set2(0x50000,0x5FFFD)
+ .set2(0x60000,0x6FFFD)
+ .set2(0x70000,0x7FFFD)
+ .set2(0x80000,0x8FFFD)
+ .set2(0x90000,0x9FFFD)
+ .set2(0xA0000,0xAFFFD)
+ .set2(0xB0000,0xBFFFD)
+ .set2(0xC0000,0xCFFFD)
+ .set2(0xD0000,0xDFFFD)
+ .set2(0xE0000)
+ .set2(0xE0002,0xE001F)
+ .set2(0xE0080,0xEFFFD);
+
+
+ public static final ChainableBitSet PROHIBITED = new ChainableBitSet()
+
+ // c.1.2
+ .set2('\u00A0').set2('\u1680').set2('\u2000','\u200B')
+ .set2('\u202F').set2('\u205F').set2('\u3000')
+
+ // c.2.2
+ .set2('\u0080','\u009F').set2('\u06DD').set2('\u070F')
+ .set2('\u180E').set2('\u200C').set2('\u200D').set2('\u2028')
+ .set2('\u2029').set2('\u2060').set2('\u2061').set2('\u2062')
+ .set2('\u2063').set2('\u206A','\u206F').set2('\uFEFF')
+ .set2('\uFFF9','\uFFFC').set2(0x1D173,0x1D17A)
+
+ // c.3
+ .set2('\uE000','\uF8FF').set2(0xF0000,0xFFFFD).set2(0x100000,0x10FFFD)
+
+ // c.4
+ .set2('\uFDD0','\uFDEF').set2('\uFFFE','\uFFFF').set2(0x1FFFE,0x1FFFF)
+ .set2(0x2FFFE,0x2FFFF).set2(0x3FFFE,0x3FFFF).set2(0x4FFFE,0x4FFFF)
+ .set2(0x5FFFE,0x5FFFF).set2(0x6FFFE,0x6FFFF).set2(0x7FFFE,0x7FFFF)
+ .set2(0x8FFFE,0x8FFFF).set2(0x9FFFE,0x9FFFF).set2(0xAFFFE,0xAFFFF)
+ .set2(0xBFFFE,0xBFFFF).set2(0xCFFFE,0xCFFFF).set2(0xDFFFE,0xDFFFF)
+ .set2(0xEFFFE,0xEFFFF).set2(0xFFFFE,0xFFFFF).set2(0x10FFFE,0x10FFFF)
+
+ // c.5
+ .set2('\uD800','\uDFFF')
+
+ // c.6
+ .set2('\uFFF9','\uFFFD')
+
+ // c.7
+ .set2('\u2FF0','\u2FFB')
+
+ // c.8
+ .set2('\u0340').set2('\u0341').set2('\u200E')
+ .set2('\u200F').set2('\u202A').set2('\u202B')
+ .set2('\u202C').set2('\u202D').set2('\u202E')
+ .set2('\u206A').set2('\u206B').set2('\u206C')
+ .set2('\u206D').set2('\u206E').set2('\u206F')
+
+ // c.9
+ .set2(0xE0001).set2(0xE0020,0xE007F)
+
+ // unassigned
+ .set2(UNASSIGNED);
+
+
+ public static final BitSet RandAL = new ChainableBitSet()
+ .set2('\u05BE').set2('\u05C0').set2('\u05C3')
+ .set2('\u05D0','\u05EA').set2('\u05F0','\u05F4')
+ .set2('\u061B').set2('\u061F').set2('\u0621','\u063A')
+ .set2('\u0640','\u064A').set2('\u066D','\u066F').set2('\u0671','\u06D5')
+ .set2('\u06DD').set2('\u06E5','\u06E6').set2('\u06FA','\u06FE')
+ .set2('\u0700','\u070D').set2('\u0710').set2('\u0712','\u072C')
+ .set2('\u0780','\u07A5').set2('\u07B1').set2('\u200F')
+ .set2('\uFB1D').set2('\uFB1F','\uFB28').set2('\uFB2A','\uFB36')
+ .set2('\uFB38','\uFB3C').set2('\uFB3E').set2('\uFB40','\uFB41')
+ .set2('\uFB43','\uFB44').set2('\uFB46','\uFBB1').set2('\uFBD3','\uFD3D')
+ .set2('\uFD50','\uFD8F').set2('\uFD92','\uFDC7').set2('\uFDF0','\uFDFC')
+ .set2('\uFE70','\uFE74').set2('\uFE76','\uFEFC');
+
+ public static final BitSet LCat = new ChainableBitSet()
+ .set2('\u0041','\u005A')
+ .set2('\u0061','\u007A')
+ .set2('\u00AA')
+ .set2('\u00B5')
+ .set2('\u00BA')
+ .set2('\u00C0','\u00D6')
+ .set2('\u00D8','\u00F6')
+ .set2('\u00F8','\u0220')
+ .set2('\u0222','\u0233')
+ .set2('\u0250','\u02AD')
+ .set2('\u02B0','\u02B8')
+ .set2('\u02BB','\u02C1')
+ .set2('\u02D0','\u02D1')
+ .set2('\u02E0','\u02E4')
+ .set2('\u02EE')
+ .set2('\u037A')
+ .set2('\u0386')
+ .set2('\u0388','\u038A')
+ .set2('\u038C')
+ .set2('\u038E','\u03A1')
+ .set2('\u03A3','\u03CE')
+ .set2('\u03D0','\u03F5')
+ .set2('\u0400','\u0482')
+ .set2('\u048A','\u04CE')
+ .set2('\u04D0','\u04F5')
+ .set2('\u04F8','\u04F9')
+ .set2('\u0500','\u050F')
+ .set2('\u0531','\u0556')
+ .set2('\u0559','\u055F')
+ .set2('\u0561','\u0587')
+ .set2('\u0589')
+ .set2('\u0903')
+ .set2('\u0905','\u0939')
+ .set2('\u093D','\u0940')
+ .set2('\u0949','\u094C')
+ .set2('\u0950')
+ .set2('\u0958','\u0961')
+ .set2('\u0964','\u0970')
+ .set2('\u0982','\u0983')
+ .set2('\u0985','\u098C')
+ .set2('\u098F','\u0990')
+ .set2('\u0993','\u09A8')
+ .set2('\u09AA','\u09B0')
+ .set2('\u09B2')
+ .set2('\u09B6','\u09B9')
+ .set2('\u09BE','\u09C0')
+ .set2('\u09C7','\u09C8')
+ .set2('\u09CB','\u09CC')
+ .set2('\u09D7')
+ .set2('\u09DC','\u09DD')
+ .set2('\u09DF','\u09E1')
+ .set2('\u09E6','\u09F1')
+ .set2('\u09F4','\u09FA')
+ .set2('\u0A05','\u0A0A')
+ .set2('\u0A0F','\u0A10')
+ .set2('\u0A13','\u0A28')
+ .set2('\u0A2A','\u0A30')
+ .set2('\u0A32','\u0A33')
+ .set2('\u0A35','\u0A36')
+ .set2('\u0A38','\u0A39')
+ .set2('\u0A3E','\u0A40')
+ .set2('\u0A59','\u0A5C')
+ .set2('\u0A5E')
+ .set2('\u0A66','\u0A6F')
+ .set2('\u0A72','\u0A74')
+ .set2('\u0A83')
+ .set2('\u0A85','\u0A8B')
+ .set2('\u0A8D')
+ .set2('\u0A8F','\u0A91')
+ .set2('\u0A93','\u0AA8')
+ .set2('\u0AAA','\u0AB0')
+ .set2('\u0AB2','\u0AB3')
+ .set2('\u0AB5','\u0AB9')
+ .set2('\u0ABD','\u0AC0')
+ .set2('\u0AC9')
+ .set2('\u0ACB','\u0ACC')
+ .set2('\u0AD0')
+ .set2('\u0AE0')
+ .set2('\u0AE6','\u0AEF')
+ .set2('\u0B02','\u0B03')
+ .set2('\u0B05','\u0B0C')
+ .set2('\u0B0F','\u0B10')
+ .set2('\u0B13','\u0B28')
+ .set2('\u0B2A','\u0B30')
+ .set2('\u0B32','\u0B33')
+ .set2('\u0B36','\u0B39')
+ .set2('\u0B3D','\u0B3E')
+ .set2('\u0B40')
+ .set2('\u0B47','\u0B48')
+ .set2('\u0B4B','\u0B4C')
+ .set2('\u0B57')
+ .set2('\u0B5C','\u0B5D')
+ .set2('\u0B5F','\u0B61')
+ .set2('\u0B66','\u0B70')
+ .set2('\u0B83')
+ .set2('\u0B85','\u0B8A')
+ .set2('\u0B8E','\u0B90')
+ .set2('\u0B92','\u0B95')
+ .set2('\u0B99','\u0B9A')
+ .set2('\u0B9C')
+ .set2('\u0B9E','\u0B9F')
+ .set2('\u0BA3','\u0BA4')
+ .set2('\u0BA8','\u0BAA')
+ .set2('\u0BAE','\u0BB5')
+ .set2('\u0BB7','\u0BB9')
+ .set2('\u0BBE','\u0BBF')
+ .set2('\u0BC1','\u0BC2')
+ .set2('\u0BC6','\u0BC8')
+ .set2('\u0BCA','\u0BCC')
+ .set2('\u0BD7')
+ .set2('\u0BE7','\u0BF2')
+ .set2('\u0C01','\u0C03')
+ .set2('\u0C05','\u0C0C')
+ .set2('\u0C0E','\u0C10')
+ .set2('\u0C12','\u0C28')
+ .set2('\u0C2A','\u0C33')
+ .set2('\u0C35','\u0C39')
+ .set2('\u0C41','\u0C44')
+ .set2('\u0C60','\u0C61')
+ .set2('\u0C66','\u0C6F')
+ .set2('\u0C82','\u0C83')
+ .set2('\u0C85','\u0C8C')
+ .set2('\u0C8E','\u0C90')
+ .set2('\u0C92','\u0CA8')
+ .set2('\u0CAA','\u0CB3')
+ .set2('\u0CB5','\u0CB9')
+ .set2('\u0CBE')
+ .set2('\u0CC0','\u0CC4')
+ .set2('\u0CC7','\u0CC8')
+ .set2('\u0CCA','\u0CCB')
+ .set2('\u0CD5','\u0CD6')
+ .set2('\u0CDE')
+ .set2('\u0CE0','\u0CE1')
+ .set2('\u0CE6','\u0CEF')
+ .set2('\u0D02','\u0D03')
+ .set2('\u0D05','\u0D0C')
+ .set2('\u0D0E','\u0D10')
+ .set2('\u0D12','\u0D28')
+ .set2('\u0D2A','\u0D39')
+ .set2('\u0D3E','\u0D40')
+ .set2('\u0D46','\u0D48')
+ .set2('\u0D4A','\u0D4C')
+ .set2('\u0D57')
+ .set2('\u0D60','\u0D61')
+ .set2('\u0D66','\u0D6F')
+ .set2('\u0D82','\u0D83')
+ .set2('\u0D85','\u0D96')
+ .set2('\u0D9A','\u0DB1')
+ .set2('\u0DB3','\u0DBB')
+ .set2('\u0DBD')
+ .set2('\u0DC0','\u0DC6')
+ .set2('\u0DCF','\u0DD1')
+ .set2('\u0DD8','\u0DDF')
+ .set2('\u0DF2','\u0DF4')
+ .set2('\u0E01','\u0E30')
+ .set2('\u0E32','\u0E33')
+ .set2('\u0E40','\u0E46')
+ .set2('\u0E4F','\u0E5B')
+ .set2('\u0E81','\u0E82')
+ .set2('\u0E84')
+ .set2('\u0E87','\u0E88')
+ .set2('\u0E8A')
+ .set2('\u0E8D')
+ .set2('\u0E94','\u0E97')
+ .set2('\u0E99','\u0E9F')
+ .set2('\u0EA1','\u0EA3')
+ .set2('\u0EA5')
+ .set2('\u0EA7')
+ .set2('\u0EAA','\u0EAB')
+ .set2('\u0EAD','\u0EB0')
+ .set2('\u0EB2','\u0EB3')
+ .set2('\u0EBD')
+ .set2('\u0EC0','\u0EC4')
+ .set2('\u0EC6')
+ .set2('\u0ED0','\u0ED9')
+ .set2('\u0EDC','\u0EDD')
+ .set2('\u0F00','\u0F17')
+ .set2('\u0F1A','\u0F34')
+ .set2('\u0F36')
+ .set2('\u0F38')
+ .set2('\u0F3E','\u0F47')
+ .set2('\u0F49','\u0F6A')
+ .set2('\u0F7F')
+ .set2('\u0F85')
+ .set2('\u0F88','\u0F8B')
+ .set2('\u0FBE','\u0FC5')
+ .set2('\u0FC7','\u0FCC')
+ .set2('\u0FCF')
+ .set2('\u1000','\u1021')
+ .set2('\u1023','\u1027')
+ .set2('\u1029','\u102A')
+ .set2('\u102C')
+ .set2('\u1031')
+ .set2('\u1038')
+ .set2('\u1040','\u1057')
+ .set2('\u10A0','\u10C5')
+ .set2('\u10D0','\u10F8')
+ .set2('\u10FB')
+ .set2('\u1100','\u1159')
+ .set2('\u115F','\u11A2')
+ .set2('\u11A8','\u11F9')
+ .set2('\u1200','\u1206')
+ .set2('\u1208','\u1246')
+ .set2('\u1248')
+ .set2('\u124A','\u124D')
+ .set2('\u1250','\u1256')
+ .set2('\u1258')
+ .set2('\u125A','\u125D')
+ .set2('\u1260','\u1286')
+ .set2('\u1288')
+ .set2('\u128A','\u128D')
+ .set2('\u1290','\u12AE')
+ .set2('\u12B0')
+ .set2('\u12B2','\u12B5')
+ .set2('\u12B8','\u12BE')
+ .set2('\u12C0')
+ .set2('\u12C2','\u12C5')
+ .set2('\u12C8','\u12CE')
+ .set2('\u12D0','\u12D6')
+ .set2('\u12D8','\u12EE')
+ .set2('\u12F0','\u130E')
+ .set2('\u1310')
+ .set2('\u1312','\u1315')
+ .set2('\u1318','\u131E')
+ .set2('\u1320','\u1346')
+ .set2('\u1348','\u135A')
+ .set2('\u1361','\u137C')
+ .set2('\u13A0','\u13F4')
+ .set2('\u1401','\u1676')
+ .set2('\u1681','\u169A')
+ .set2('\u16A0','\u16F0')
+ .set2('\u1700','\u170C')
+ .set2('\u170E','\u1711')
+ .set2('\u1720','\u1731')
+ .set2('\u1735','\u1736')
+ .set2('\u1740','\u1751')
+ .set2('\u1760','\u176C')
+ .set2('\u176E','\u1770')
+ .set2('\u1780','\u17B6')
+ .set2('\u17BE','\u17C5')
+ .set2('\u17C7','\u17C8')
+ .set2('\u17D4','\u17DA')
+ .set2('\u17DC')
+ .set2('\u17E0','\u17E9')
+ .set2('\u1810','\u1819')
+ .set2('\u1820','\u1877')
+ .set2('\u1880','\u18A8')
+ .set2('\u1E00','\u1E9B')
+ .set2('\u1EA0','\u1EF9')
+ .set2('\u1F00','\u1F15')
+ .set2('\u1F18','\u1F1D')
+ .set2('\u1F20','\u1F45')
+ .set2('\u1F48','\u1F4D')
+ .set2('\u1F50','\u1F57')
+ .set2('\u1F59')
+ .set2('\u1F5B')
+ .set2('\u1F5D')
+ .set2('\u1F5F','\u1F7D')
+ .set2('\u1F80','\u1FB4')
+ .set2('\u1FB6','\u1FBC')
+ .set2('\u1FBE')
+ .set2('\u1FC2','\u1FC4')
+ .set2('\u1FC6','\u1FCC')
+ .set2('\u1FD0','\u1FD3')
+ .set2('\u1FD6','\u1FDB')
+ .set2('\u1FE0','\u1FEC')
+ .set2('\u1FF2','\u1FF4')
+ .set2('\u1FF6','\u1FFC')
+ .set2('\u200E')
+ .set2('\u2071')
+ .set2('\u207F')
+ .set2('\u2102')
+ .set2('\u2107')
+ .set2('\u210A','\u2113')
+ .set2('\u2115')
+ .set2('\u2119','\u211D')
+ .set2('\u2124')
+ .set2('\u2126')
+ .set2('\u2128')
+ .set2('\u212A','\u212D')
+ .set2('\u212F','\u2131')
+ .set2('\u2133','\u2139')
+ .set2('\u213D','\u213F')
+ .set2('\u2145','\u2149')
+ .set2('\u2160','\u2183')
+ .set2('\u2336','\u237A')
+ .set2('\u2395')
+ .set2('\u249C','\u24E9')
+ .set2('\u3005','\u3007')
+ .set2('\u3021','\u3029')
+ .set2('\u3031','\u3035')
+ .set2('\u3038','\u303C')
+ .set2('\u3041','\u3096')
+ .set2('\u309D','\u309F')
+ .set2('\u30A1','\u30FA')
+ .set2('\u30FC','\u30FF')
+ .set2('\u3105','\u312C')
+ .set2('\u3131','\u318E')
+ .set2('\u3190','\u31B7')
+ .set2('\u31F0','\u321C')
+ .set2('\u3220','\u3243')
+ .set2('\u3260','\u327B')
+ .set2('\u327F','\u32B0')
+ .set2('\u32C0','\u32CB')
+ .set2('\u32D0','\u32FE')
+ .set2('\u3300','\u3376')
+ .set2('\u337B','\u33DD')
+ .set2('\u33E0','\u33FE')
+ .set2('\u3400','\u4DB5')
+ .set2('\u4E00','\u9FA5')
+ .set2('\uA000','\uA48C')
+ .set2('\uAC00','\uD7A3')
+ .set2('\uD800','\uFA2D')
+ .set2('\uFA30','\uFA6A')
+ .set2('\uFB00','\uFB06')
+ .set2('\uFB13','\uFB17')
+ .set2('\uFF21','\uFF3A')
+ .set2('\uFF41','\uFF5A')
+ .set2('\uFF66','\uFFBE')
+ .set2('\uFFC2','\uFFC7')
+ .set2('\uFFCA','\uFFCF')
+ .set2('\uFFD2','\uFFD7')
+ .set2('\uFFDA','\uFFDC')
+ .set2(0x10300,0x1031E)
+ .set2(0x10320,0x10323)
+ .set2(0x10330,0x1034A)
+ .set2(0x10400,0x10425)
+ .set2(0x10428,0x1044D)
+ .set2(0x1D000,0x1D0F5)
+ .set2(0x1D100,0x1D126)
+ .set2(0x1D12A,0x1D166)
+ .set2(0x1D16A,0x1D172)
+ .set2(0x1D183,0x1D184)
+ .set2(0x1D18C,0x1D1A9)
+ .set2(0x1D1AE,0x1D1DD)
+ .set2(0x1D400,0x1D454)
+ .set2(0x1D456,0x1D49C)
+ .set2(0x1D49E,0x1D49F)
+ .set2(0x1D4A2)
+ .set2(0x1D4A5,0x1D4A6)
+ .set2(0x1D4A9,0x1D4AC)
+ .set2(0x1D4AE,0x1D4B9)
+ .set2(0x1D4BB)
+ .set2(0x1D4BD,0x1D4C0)
+ .set2(0x1D4C2,0x1D4C3)
+ .set2(0x1D4C5,0x1D505)
+ .set2(0x1D507,0x1D50A)
+ .set2(0x1D50D,0x1D514)
+ .set2(0x1D516,0x1D51C)
+ .set2(0x1D51E,0x1D539)
+ .set2(0x1D53B,0x1D53E)
+ .set2(0x1D540,0x1D544)
+ .set2(0x1D546)
+ .set2(0x1D54A,0x1D550)
+ .set2(0x1D552,0x1D6A3)
+ .set2(0x1D6A8,0x1D7C9)
+ .set2(0x20000,0x2A6D6)
+ .set2(0x2F800,0x2FA1D)
+ .set2(0xF0000,0xFFFFD)
+ .set2(0x100000,0x10FFFD);
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,114 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import org.apache.abdera.util.ChainableBitSet;
+import org.apache.abdera.util.io.CodepointIterator;
+import org.apache.abdera.util.io.FilterCodepointIterator;
+import org.apache.abdera.util.io.InvalidCharacterException;
+
+
+/**
+ * CodepointIterator implementation that implements the bulk of the
+ * Nameprep details
+ */
+class NameprepCodepointIterator extends FilterCodepointIterator {
+
+ private int[] rep = null;
+ private int reppos = 0;
+ private boolean haslcat = false;
+ private boolean hasrandalcat = false;
+ private boolean firstisrandalcat = false;
+
+ private final ChainableBitSet PROHIBITED;
+
+ @Override
+ public boolean hasNext() {
+ return rep != null || super.hasNext();
+ }
+
+ protected NameprepCodepointIterator(
+ CodepointIterator internal) {
+ this(internal,false);
+ }
+
+ private boolean islcat(int r) {
+ return (Nameprep.LCat.get(r));
+ }
+
+ private boolean israndalcat(int r) {
+ return (Nameprep.RandAL.get(r));
+ }
+
+ protected NameprepCodepointIterator(
+ CodepointIterator internal, boolean allowunassigned) {
+ super(internal);
+ PROHIBITED = (!allowunassigned) ?
+ Nameprep.PROHIBITED :
+ ((ChainableBitSet)Nameprep.PROHIBITED.clone()).set2(
+ Nameprep.UNASSIGNED,false);
+ }
+
+ @Override
+ public int next() throws InvalidCharacterException {
+ int r = -1;
+ if (this.rep == null) {
+ r = super.next();
+ if (r != -1) {
+ if (islcat(r)) haslcat = true;
+ if (israndalcat(r)) {
+ hasrandalcat = true;
+ if (position() == 1) firstisrandalcat = true;
+ }
+ if (haslcat && hasrandalcat) throw new RuntimeException("Bidi Exception");
+
+ while(r != -1 && Nameprep.B1.get(r)) {
+ r = super.next();
+ }
+
+ if (r != -1) {
+ if (PROHIBITED.get(r)) throw new InvalidCharacterException(r);
+ int[] rep = Nameprep.B2(r);
+ if (rep != null) {
+ if (rep.length > 1) {
+ this.rep = rep;
+ reppos = 0;
+ }
+ r = rep[0];
+ }
+ }
+ }
+ } else {
+ r = rep[++reppos];
+ if (reppos+1 >= rep.length) rep = null;
+ }
+ if ((r == -1 || !hasNext()) &&
+ hasrandalcat &&
+ (!firstisrandalcat ||
+ !israndalcat((r ==-1)?peek(position()):r))) {
+ throw new RuntimeException("Bidi Exception");
+ }
+ return r;
+ }
+
+ @Override
+ public char[] nextChars() throws InvalidCharacterException {
+ return super.nextChars();
+ }
+
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,210 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+
+
+/**
+ * Implementation of the Punycode encoding scheme used by IDNA
+ */
+public final class Punycode {
+
+ static final int base = 0x24; // 36
+ static final int tmin = 0x01; // 1
+ static final int tmax = 0x1A; // 26
+ static final int skew = 0x26; // 38
+ static final int damp = 0x02BC; // 700
+ static final int initial_bias = 0x48; // 72
+ static final int initial_n = 0x80; //0x80
+ static final int delimiter = 0x2D; //0x2D
+
+ Punycode() {}
+
+ private static boolean basic(int cp) {
+ return cp < 0x80;
+ }
+
+ private static boolean delim(int cp) {
+ return cp == delimiter;
+ }
+
+ private static boolean flagged(int bcp) {
+ return (bcp - 65) < 26;
+ }
+
+ private static int decode_digit(int cp) {
+ return (cp - 48 < 10) ?
+ cp - 22 :
+ (cp - 65 < 26) ?
+ cp - 65 :
+ (cp - 97 < 26) ?
+ cp - 97 :
+ base;
+ }
+
+ private static int t(boolean c) {
+ return (c)?1:0;
+ }
+
+ private static int encode_digit(int d, boolean upper) {
+ return (d + 22 + 75 * t(d<26)) - (t(upper) << 5);
+ }
+
+ private static int adapt(int delta, int numpoints, boolean firsttime) {
+ int k;
+ delta = (firsttime) ? delta / damp : delta >> 1;
+ delta += delta / numpoints;
+ for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
+ delta /= base - tmin;
+ }
+ return k + (base - tmin + 1) * delta / (delta + skew);
+ }
+
+ public static StringBuffer encode(
+ char[] chars,
+ boolean[] case_flags)
+ throws IOException {
+ StringBuffer buf = new StringBuffer();
+ CodepointIterator ci = CodepointIterator.forCharArray(chars);
+ int n, delta, h, b, bias, m, q, k, t;
+ n = initial_n;
+ delta = 0;
+ bias = initial_bias;
+ int i = -1;
+ while (ci.hasNext()) {
+ i = ci.next();
+ if (basic(i)) {
+ if (case_flags != null) {
+ } else {
+ buf.append((char)i);
+ }
+ }
+ }
+ h = b = buf.length();
+ if (b > 0) buf.append((char)delimiter);
+ while (h < chars.length) {
+ ci.position(0);
+ i = -1;
+ m = Integer.MAX_VALUE;
+ while(ci.hasNext()) {
+ i = ci.next();
+ if (i >= n && i < m) m = i;
+ }
+ if (m - n > (Integer.MAX_VALUE - delta) / (h + 1))
+ throw new IOException("Overflow");
+ delta += (m-n) * (h+1);
+ n = m;
+ ci.position(0);
+ i = -1;
+ while (ci.hasNext()) {
+ i = ci.next();
+ if (i < n) {
+ if (++delta == 0) throw new IOException("Overflow");
+ }
+ if (i == n) {
+ for (q = delta, k = base;; k+= base) {
+ t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
+ if (q < t) break;
+ buf.append((char)encode_digit(t+(q-t)%(base-t),false));
+ q = (q-t) / (base-t);
+ }
+ buf.append((char)encode_digit(
+ q, (case_flags!=null)?case_flags[ci.position()-1]:false));
+ bias = adapt(delta,h+1,h==b);
+ delta=0;
+ ++h;
+ }
+ }
+ ++delta; ++n;
+ }
+ return buf;
+ }
+
+ public static String encode(String s) {
+ try {
+ if (s == null) return null;
+ return encode(s.toCharArray(),null).toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static String decode(String s) {
+ try {
+ if (s == null) return null;
+ return decode(s.toCharArray(),null).toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static StringBuffer decode(
+ char[] chars,
+ boolean[] case_flags)
+ throws IOException {
+ StringBuffer buf = new StringBuffer();
+ int n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+ n = initial_n;
+ out = i = 0;
+ bias = initial_bias;
+ for (b = j = 0; j < chars.length; ++j)
+ if (delim(chars[j])) b = j;
+ for (j = 0; j < b; ++j) {
+ if (case_flags != null) case_flags[out] = flagged(chars[j]);
+ if (!basic(chars[j])) throw new IOException("Bad Input");
+ buf.append((char)chars[j]);
+ }
+ out = buf.length();
+ for (in = (b > 0) ? b + 1 : 0; in < chars.length; ++out) {
+ for (oldi = i, w = 1, k = base; ; k += base) {
+ if (in > chars.length) throw new IOException("Bad input");
+ digit = decode_digit(chars[in++]);
+ if (digit >= base) throw new IOException("Bad input");
+ if (digit > (Integer.MAX_VALUE - i) / w) throw new IOException("Overflow");
+ i += digit * w;
+ t = (k <= bias) ?
+ tmin :
+ (k >= bias + tmax) ?
+ tmax :
+ k - bias;
+ if (digit < t) break;
+ if (w > Integer.MAX_VALUE / (base - t)) throw new IOException("Overflow");
+ w *= (base - t);
+ }
+ bias = adapt(i - oldi, out + 1, oldi == 0);
+ if (i / (out + 1) > Integer.MAX_VALUE - n) throw new IOException("Overflow");
+ n += i / (out + 1);
+ i %= (out + 1);
+ if (case_flags != null) {
+ System.arraycopy( // not sure if this is right
+ case_flags, i,
+ case_flags, i+CharUtils.size(n),
+ case_flags.length-i);
+ }
+ CharUtils.insert(buf, i++, n);
+ }
+ return buf;
+ }
+
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,32 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import org.apache.abdera.util.io.CodepointIterator;
+
+/**
+ * Interface implemented by custom IRI scheme parsers
+ */
+public interface Scheme {
+
+ String getName();
+
+ boolean equivalent(IRI iri1, IRI iri2);
+
+ boolean parse(CodepointIterator reader, Builder builder);
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,75 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Static registry of custom IRI schemes.
+ */
+public final class SchemeRegistry {
+
+ private static SchemeRegistry registry;
+
+ public static synchronized SchemeRegistry getInstance() {
+ if (registry == null) registry = new SchemeRegistry();
+ return registry;
+ }
+
+ private final Map<String,Scheme> schemes;
+
+ SchemeRegistry() {
+ schemes = new HashMap<String,Scheme>();
+ schemes.put(HttpScheme.NAME, new HttpScheme());
+ schemes.put(HttpsScheme.NAME, new HttpsScheme());
+ }
+
+ @SuppressWarnings("unchecked")
+ public synchronized boolean register(
+ String schemeClass)
+ throws ClassNotFoundException,
+ IllegalAccessException,
+ InstantiationException {
+ Class<Scheme> klass =
+ (Class<Scheme>) Thread.currentThread()
+ .getContextClassLoader().loadClass(schemeClass);
+ return register(klass);
+ }
+
+ public synchronized boolean register(
+ Class<Scheme> schemeClass)
+ throws IllegalAccessException,
+ InstantiationException {
+ Scheme scheme = schemeClass.newInstance();
+ return register(scheme);
+ }
+
+ public synchronized boolean register(Scheme scheme) {
+ String name = scheme.getName();
+ if (schemes.get(name) == null) {
+ schemes.put(name.toLowerCase(), scheme);
+ return true;
+ } else return false;
+ }
+
+ public Scheme getScheme(String scheme) {
+ return schemes.get(scheme.toLowerCase());
+ }
+
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,40 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.lang;
+
+public class InvalidLangTagSyntax extends Exception {
+
+ private static final long serialVersionUID = -2653819135178550519L;
+
+ public InvalidLangTagSyntax() {
+ super();
+ }
+
+ public InvalidLangTagSyntax(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public InvalidLangTagSyntax(String message) {
+ super(message);
+ }
+
+ public InvalidLangTagSyntax(Throwable cause) {
+ super(cause);
+ }
+
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,185 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.lang;
+
+import java.io.Serializable;
+import java.util.Iterator;
+import java.util.Locale;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.InvalidCharacterException;
+import org.apache.abdera.util.iri.Constants;
+
+
+/**
+ * rfc3066
+ */
+public class Lang
+ implements Iterable<String>,
+ Serializable,
+ Cloneable {
+
+ public static final Lang ANY = new Lang();
+
+ private static final long serialVersionUID = -4620499451615533855L;
+ protected final String[] tags;
+ protected final Locale locale;
+
+ private Lang() {
+ tags = new String[] {"*"};
+ locale = null;
+ }
+
+ public Lang(Locale locale) {
+ this.tags = locale.toString().replace("\u005F","\u002D").split("\u002D");
+ this.locale = locale;
+ }
+
+ public Lang(String tag) throws InvalidLangTagSyntax {
+ this(parse(tag));
+ }
+
+ public Lang(String... tags) throws InvalidLangTagSyntax {
+ verify(tags);
+ this.tags = tags;
+ this.locale = initLocale();
+ }
+
+ private Locale initLocale() {
+ Locale locale = null;
+ switch(tags.length) {
+ case 0: break;
+ case 1: locale = new Locale(tags[0]); break;
+ case 2: locale = new Locale(tags[0],tags[1]); break;
+ default: locale = new Locale(tags[0],tags[1],tags[2]); break;
+ }
+ return locale;
+ }
+
+ public String getPrimary() {
+ return tags[0];
+ }
+
+ public String getSubtag(int n) {
+ if (n+1 > tags.length) throw new ArrayIndexOutOfBoundsException(n);
+ return tags[n+1];
+ }
+
+ public int getSubtagCount() {
+ return tags.length-1;
+ }
+
+ public Locale getLocale() {
+ return locale;
+ }
+
+ public String toString() {
+ StringBuffer buf = new StringBuffer();
+ for (String s: tags) {
+ if (buf.length() > 0) buf.append('\u002D');
+ buf.append(s);
+ }
+ return buf.toString();
+ }
+
+ public static boolean matches(Lang lang, String range) throws InvalidLangTagSyntax {
+ if (range.equals("*")) return true;
+ return matches(lang, new Lang(range));
+ }
+
+ public static boolean matches(Lang lang, Lang range) {
+ if (range.equals("*")) return true;
+ if (lang.equals(range)) return true;
+ if (lang.tags.length <= range.tags.length) return false;
+ for (int n = 0; n < range.tags.length; n++) {
+ if (!lang.tags[n].equalsIgnoreCase(range.tags[n])) return false;
+ }
+ return true;
+ }
+
+ public boolean matches(String range) throws InvalidLangTagSyntax {
+ return matches(this,range);
+ }
+
+ public boolean matches(Lang range) {
+ return matches(this,range);
+ }
+
+ @Override
+ public int hashCode() {
+ final int PRIME = 31;
+ int result = 1;
+ result = PRIME * result + ((locale == null) ? 0 : locale.hashCode());
+ for (String tag: tags) {
+ result = PRIME * result + tag.hashCode();
+ }
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (obj instanceof String) {
+ String s = (String) obj;
+ if (s.equals("*")) obj = ANY;
+ else {
+ try {
+ obj = new Lang(s);
+ } catch (Exception e) {}
+ }
+ }
+ if (getClass() != obj.getClass())
+ return false;
+ final Lang other = (Lang) obj;
+ if (tags.length != other.tags.length) return false;
+ for (int n = 0; n < tags.length; n++) {
+ if (!tags[n].equalsIgnoreCase(other.tags[n])) return false;
+ }
+ return true;
+ }
+
+ private static void verify(String[] tags) throws InvalidLangTagSyntax {
+ if (tags.length == 0) throw new InvalidLangTagSyntax();
+ String primary = tags[0];
+ try {
+ CharUtils.verify(primary,Constants.ALPHA);
+ } catch (InvalidCharacterException e) {
+ throw new InvalidLangTagSyntax();
+ }
+ for (int n = 1; n < tags.length; n++) {
+ try {
+ CharUtils.verify(tags[n],Constants.ALPHANUM);
+ } catch (InvalidCharacterException e) {
+ throw new InvalidLangTagSyntax();
+ }
+ }
+ }
+
+ private static String[] parse(String tag) throws InvalidLangTagSyntax {
+ String[] tags = tag.split("\u002D");
+ verify(tags);
+ return tags;
+ }
+
+ public Iterator<String> iterator() {
+ return java.util.Arrays.asList(tags).iterator();
+ }
+}
Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,179 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.unicode;
+
+import java.io.IOException;
+
+import org.apache.abdera.util.io.CharUtils;
+import org.apache.abdera.util.io.CodepointIterator;
+
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+ public enum Mask {
+ NONE,
+ COMPATIBILITY,
+ COMPOSITION
+ }
+
+ public enum Form {
+ D,
+ C(Mask.COMPOSITION),
+ KD(Mask.COMPATIBILITY),
+ KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+
+ private int mask = 0;
+
+ Form(Mask... masks) {
+ for (Mask mask : masks) {
+ this.mask |= (mask.ordinal());
+ }
+ }
+
+ public boolean isCompatibility() {
+ return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+ }
+
+ public boolean isCanonical() {
+ return !isCompatibility();
+ }
+
+ public boolean isComposition() {
+ return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+ }
+ }
+
+ private Normalizer() {}
+
+ /**
+ * Normalize the string using NFKC
+ */
+ public static StringBuffer normalize(String source) throws IOException {
+ return normalize(source, Form.KC);
+ }
+
+ /**
+ * Normalize the string using the specified Form
+ */
+ public static StringBuffer normalize(
+ String source,
+ Form form)
+ throws IOException {
+ return normalize(source, form, new StringBuffer());
+ }
+
+ /**
+ * Normalize the string into the given StringBuffer using the given Form
+ */
+ public static StringBuffer normalize(
+ String source,
+ Form form,
+ StringBuffer buf)
+ throws IOException {
+ UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+ if (source.length() != 0 && ucd != null) {
+ decompose(ucd, source, form, buf);
+ compose(ucd, form, buf);
+ }
+ return buf;
+ }
+
+ private static void decompose(
+ UnicodeCharacterDatabase ucd,
+ String source,
+ Form form,
+ StringBuffer buf)
+ throws IOException {
+ StringBuffer internal = new StringBuffer();
+ CodepointIterator ci = CodepointIterator.forCharSequence(source);
+ boolean canonical = form.isCanonical();
+ while (ci.hasNext()) {
+ int c = ci.next();
+ internal.setLength(0);
+ ucd.decompose(c, canonical, internal);
+ CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+ while(ii.hasNext()) {
+ int ch = ii.next();
+ int i = findInsertionPoint(ucd, buf, ch);
+ buf.insert(i,CharUtils.toString(ch));
+ }
+ }
+
+ }
+
+ private static int findInsertionPoint(
+ UnicodeCharacterDatabase ucd,
+ StringBuffer buf, int c) {
+ int cc = ucd.getCanonicalClass(c);
+ int i = buf.length();
+ if (cc != 0) {
+ int ch;
+ for (; i > 0; i -= CharUtils.size(c)) {
+ ch = CharUtils.charAt(buf, i-1);
+ if (ucd.getCanonicalClass(ch) <= cc) break;
+ }
+ }
+ return i;
+ }
+
+ private static void compose(
+ UnicodeCharacterDatabase ucd,
+ Form form,
+ StringBuffer buf)
+ throws IOException {
+ if (!form.isComposition()) return;
+ int pos = 0;
+ int lc = CharUtils.charAt(buf, pos);
+ int cpos = CharUtils.size(lc);
+ int lcc = ucd.getCanonicalClass(lc);
+ if (lcc != 0) lcc = 256;
+ int len = buf.length();
+ int c;
+ for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.size(c)) {
+ c = CharUtils.charAt(buf,dpos);
+ int cc = ucd.getCanonicalClass(c);
+ int composite = ucd.getPairComposition(lc, c);
+ if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+ CharUtils.setChar(buf, pos, composite);
+ lc = composite;
+ } else {
+ if (cc == 0) {
+ pos = cpos;
+ lc = c;
+ }
+ lcc = cc;
+ CharUtils.setChar(buf,cpos,c);
+ if (buf.length() != len) {
+ dpos += buf.length() - len;
+ len = buf.length();
+ }
+ cpos += CharUtils.size(c);
+ }
+ }
+ buf.setLength(cpos);
+ }
+
+ public static void main(String... args) throws Exception {
+
+ UnicodeCharacterDatabase.main("src/org/apache/abdera/util/unicode/data/ucd.res");
+
+ }
+}