You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2006/09/22 07:40:03 UTC

svn commit: r448818 [1/3] - in /incubator/abdera/java/trunk/core/src: main/java/org/apache/abdera/util/ main/java/org/apache/abdera/util/io/ main/java/org/apache/abdera/util/iri/ main/java/org/apache/abdera/util/lang/ main/java/org/apache/abdera/util/u...

Author: jmsnell
Date: Thu Sep 21 22:40:01 2006
New Revision: 448818

URL: http://svn.apache.org/viewvc?view=rev&rev=448818
Log:
Initial check in of the IRI/IDN support.  I had originally thought this would land in the parser project,
but then I remembered that we'll likely need to surface the IRI and Lang objects provided by this package
in the core model api... at least at some point.  Not sure how we should ultimately package this, but
we can work on those details later.

* Unicode 3.2.0 Normalization implementation.  We likely need to update to Unicode 4.0,or at least check
  to make sure this code will work with either

* Punycode/Nameprep/IDNA implementation.  This is fundamental to IRI support and the most significant
  hole in the java.net.* stuff.  This code implements support for both the ToASCII and ToUnicode algorithms

* IRI implementation. API is similar to java.net.URI, provides conversion to and from URI/URL, provides
  custom scheme support, scheme specific equivalence checking, bunch of other stuff.

* Language tag implementation, e.g. parsing and validating the syntax of things like en-US

* Bunch of IO utility classes including a CodepointIterator that converts surrogate chars into 
  supplementary codepoints from byte arrays, char arrays, charsequences, charbuffers, etc. Also 
  some InputStream implementations that extend things like PushbackInputStream with dynamic buffer
  management,etc. Some of these currently exist in the parser module, but it made sense to move
  them around and keep them with the other new io stuff.

Test cases are included.

The org.apache.abdera.util.unicode.data package contains the gzip compressed binary representation of the
Unicode Character Database (version 3.2.0).  The Unicode license forbids us from storing the source
text files but we can store a binary represenation used to enable our impl.  I'll be working a process
into the build that will download the source files and generate the ucd.res file during the build process.

None of the Abdera code has been modified to use these new classes yet

Added:
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/ChainableBitSet.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ByteArrayCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharArrayCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharBufferCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharSequenceCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharUtils.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/DynamicPushbackInputStream.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/FilterCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/InvalidCharacterException.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/PeekAheadInputStream.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ReadWriteByteChannel.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RestrictedCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RewindableInputStream.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/AbstractScheme.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Builder.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Constants.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Escaping.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpScheme.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpsScheme.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IDNA.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRI.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IRISyntaxException.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Nameprep.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/NameprepCodepointIterator.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Punycode.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Scheme.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/SchemeRegistry.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/InvalidLangTagSyntax.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/lang/Lang.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/Normalizer.java
    incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/unicode/UnicodeCharacterDatabase.java
    incubator/abdera/java/trunk/core/src/main/resources/org/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/iri/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/iri/data/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/iri/data/B2.dat
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/unicode/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/unicode/data/
    incubator/abdera/java/trunk/core/src/main/resources/org/apache/abdera/util/unicode/data/ucd.res   (with props)
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestBase.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestIDNA.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestIRI.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestLang.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestNFKC.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestNameprep.java
    incubator/abdera/java/trunk/core/src/test/java/org/apache/abdera/test/iri/TestPunycode.java

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/ChainableBitSet.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/ChainableBitSet.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/ChainableBitSet.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/ChainableBitSet.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,119 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util;
+
+import java.util.BitSet;
+
+/**
+ * Extension to java.util.BitSet that allows calls to set to be chained, e.g.
+ * bs.set2(1).set(2).set(3), making it easier to define a complex bit set in 
+ * a single declaration.
+ */
+public class ChainableBitSet 
+  extends BitSet {
+
+  private static final long serialVersionUID = -1105957441212997513L;
+
+  public ChainableBitSet and2(BitSet set) {
+    and(set);
+    return this;
+  }
+  
+  public ChainableBitSet addNot2(BitSet set) {
+    andNot(set);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int index) {
+    clear(index);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int... indexes) {
+    for (int i : indexes) clear(i);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int startIndex, int endIndex) {
+    clear(startIndex, endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int index) {
+    flip(index);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int... indexes) {
+    for (int i : indexes) flip(i);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int startIndex, int endIndex) {
+    flip(startIndex,endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet or2(BitSet set) {
+    or(set);
+    return this;
+  }
+  
+  public ChainableBitSet xor2(BitSet set) {
+    xor(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(String s) {
+    char[] chars = s.toCharArray();
+    for (char c : chars) set(c);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set) {
+    this.or(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int ... bits) {
+    for (int n : bits) set(n);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int fromIndex, int toIndex) {
+    super.set(fromIndex, toIndex+1);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex) {
+    super.set(bitIndex);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex, boolean value) {
+    super.set(bitIndex, value);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set, boolean value) {
+    if (value) return set2(set);
+    else this.andNot(set);
+    return this;
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ByteArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ByteArrayCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ByteArrayCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ByteArrayCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * Iterate over Unicode codepoints decoded from an array of bytes
+ */
+public class ByteArrayCodepointIterator 
+  extends CharArrayCodepointIterator {
+  
+  public ByteArrayCodepointIterator(byte[] bytes) {
+    this(bytes,Charset.defaultCharset());
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+    this(bytes,Charset.forName(charset));
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+    CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharArrayCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharArrayCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharArrayCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+/**
+ * Iterate over Unicode codepoints contained in a char array
+ */
+public class CharArrayCodepointIterator 
+  extends CodepointIterator {
+
+  protected char[] buffer;
+  
+  protected CharArrayCodepointIterator() {}
+  
+  public CharArrayCodepointIterator(char[] buffer) {
+    this(buffer,0,buffer.length);
+  }
+  
+  public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length-n,e);
+  }
+  
+  protected char get() {
+    return (position < limit) ? buffer[position++] : (char)-1;
+  }
+  
+  protected char get(int index) {
+    if (index < 0 || index >= limit) 
+      throw new ArrayIndexOutOfBoundsException(index);
+    return buffer[index];
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharBufferCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharBufferCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharBufferCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharBufferCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints in a java.nio.CharBuffer
+ */
+public class CharBufferCodepointIterator 
+  extends CharArrayCodepointIterator {
+
+  public CharBufferCodepointIterator(CharBuffer cb) {
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharSequenceCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharSequenceCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharSequenceCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharSequenceCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,46 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+/**
+ * Iterate over Unicode codepoints in a CharSequence (e.g. String, StringBuffer, etc)
+ */
+public class CharSequenceCodepointIterator 
+  extends CodepointIterator {
+
+  private CharSequence buffer;
+  
+  public CharSequenceCodepointIterator(CharSequence buffer) {
+    this(buffer,0,buffer.length());
+  }
+  
+  public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length()-n,e);
+  }
+  
+  protected char get() {
+    return buffer.charAt(position++);
+  }
+
+  protected char get(int index) {
+    return buffer.charAt(index);
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharUtils.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharUtils.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharUtils.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CharUtils.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,235 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.util.BitSet;
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+  private CharUtils() {}
+ 
+  public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(String s, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(String s, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static boolean inRange(char[] chars, char low, char high) {
+    for (int i = 0; i < chars.length; i++)
+      if (chars[i] < low || chars[i] > high) return false;
+    return true;
+  }
+
+  public static boolean inRange(char[] chars, int low, int high) {
+    for (int i = 0; i < chars.length; i++) {
+      char n = chars[i];
+      int c = (isHighSurrogate(n) && 
+               i + 1 < chars.length && 
+               isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
+      if (c < low || c > high) return false;
+    }
+    return true;
+  }
+  
+  public static boolean isSet(int n, BitSet... sets) {
+    if (n == -1) return false;
+    BitSet set = new BitSet();
+    for (BitSet s : sets) set.or(s);
+    return set.get(n);
+  }
+  
+  public static void append(StringBuffer buf, int c) {
+    if (isSupplementary(c)) {
+      buf.append(getHighSurrogate(c));
+      buf.append(getLowSurrogate(c));
+    } else buf.append((char)c);
+  }
+  
+  public static char getHighSurrogate(int c) {
+    return (c >= 0x10000) ?
+       (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+  }
+
+  public static char getLowSurrogate(int c) {    
+    return (c >= 0x10000) ?
+        (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+  }
+  
+  public static boolean isHighSurrogate(char c) {
+    return c <= '\uDBFF' && c >= '\uD800';
+  }
+
+  public static boolean isLowSurrogate(char c) {
+    return c <= '\uDFFF' && c >= '\uDC00';
+  }
+  
+  public static boolean isSupplementary(int c) {
+    return c <= 0x10ffff && c >= 0x010000;
+  }
+  
+  public static boolean isSurrogatePair(char high, char low) {
+    return isHighSurrogate(high) && isLowSurrogate(low);
+  }
+  
+  public static int toCodePoint(char[] chars) {
+    return toCodePoint(chars[0],chars[1]);
+  }
+  
+  public static int toCodePoint(char high, char low) {
+    return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;    
+  }
+
+  public static int charAt(String s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static int charAt(StringBuffer s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static void insert(StringBuffer s, int i, int c) {
+    if (i > 0 && i < s.length()) {
+      char ch = s.charAt(i);
+      boolean low = isLowSurrogate(ch);
+      if (low) {
+        if (low && isHighSurrogate(s.charAt(i-1))) {
+          i--;
+        }
+      }
+    }
+    s.insert(i, toString(c));
+  }
+  
+  public static void setChar(StringBuffer s, int i, int c) {
+    int l = 1;
+    char ch = s.charAt(i);
+    boolean high = isHighSurrogate(ch);
+    boolean low = isLowSurrogate(ch);
+    if (high || low) {
+      if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+      else {
+        if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+          i--; l++;
+        }
+      }
+    }
+    s.replace(i, i+l, toString(c));
+  }
+  
+  public static int size(int c) {
+    return (isSupplementary(c)) ? 2 : 1;
+  }
+  
+  private static String supplementaryToString(int c) {
+    StringBuffer buf = new StringBuffer();
+    buf.append((char)getHighSurrogate(c));
+    buf.append((char)getLowSurrogate(c));
+    return buf.toString();
+  }
+  
+  public static String toString(int c) {
+    return (isSupplementary(c)) ? 
+      supplementaryToString(c) : 
+      String.valueOf((char)c);
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/CodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints 
+ */
+public abstract class CodepointIterator {
+
+  public static CodepointIterator forCharArray(char[] array) {
+    return new CharArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharSequence(CharSequence seq) {
+    return new CharSequenceCodepointIterator(seq);
+  }
+  
+  public static CodepointIterator forByteArray(byte[] array) {
+    return new ByteArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+    return new CharBufferCodepointIterator(buffer);
+  }
+  
+  protected int position = -1;
+  protected int limit = -1;
+  
+  protected abstract char get();
+  
+  protected abstract char get(int index);
+  
+  public boolean hasNext() {
+    return remaining() > 0;
+  }
+
+  public int last() {
+    return (position() > 0) ? get(position() - 1) : -1;
+  }
+  
+  public int lastPosition() {
+    int p = position();
+    return (p > -1) ? 
+      (p >= limit()) ? p : p - 1 : -1;
+  }
+  
+  public char[] nextChars() throws InvalidCharacterException {
+    if (hasNext()) {
+      if (isNextSurrogate()) {
+        char c1 = get();
+        if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+          char c2 = get();
+          if (CharUtils.isLowSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+          char c2 = get(position()-2);
+          if (CharUtils.isHighSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        }
+      }
+      return new char[] {get()}; 
+    } 
+    return null;
+  }
+  
+  public char[] peekChars() throws InvalidCharacterException {
+    return peekChars(position());
+  }
+  
+  private char[] peekChars(int pos) throws InvalidCharacterException {
+    if (pos < 0 || pos >= limit()) return null;
+    char c1 = get(pos);
+    if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+      char c2 = get(pos+1);
+      if (CharUtils.isLowSurrogate(c2)) {
+        return new char[] {c1,c2};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+      char c2 = get(pos-1);
+      if (CharUtils.isHighSurrogate(c2)) {
+        return new char[] {c2,c1};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else  return new char[] {c1}; 
+  }
+  
+  public int next() throws InvalidCharacterException {
+    char[] chars = nextChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+
+  public int peek() throws InvalidCharacterException {
+    char[] chars = peekChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public int peek(int index) throws InvalidCharacterException {
+    char[] chars = peekChars(index);
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public void position(int n) {
+    if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+    position = n;
+  }
+  
+  public int position() {
+    return position;
+  }
+
+  public int limit() {
+    return limit;
+  }
+  
+  public int remaining() {
+    return limit - position();
+  }
+  
+  private boolean isNextSurrogate() {
+    if (!hasNext()) return false;
+    char c = get(position());
+    return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+  }
+
+  public boolean isHigh(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isHighSurrogate(get(index));
+  }
+
+  public boolean isLow(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isLowSurrogate(get(index));
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/DynamicPushbackInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/DynamicPushbackInputStream.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/DynamicPushbackInputStream.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/DynamicPushbackInputStream.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,112 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+/**
+ * PushbackInputStream implementation that performs dynamic resizing of
+ * the unread buffer
+ */
+public class DynamicPushbackInputStream extends PushbackInputStream {
+
+  private int origsize = 1;
+  
+  public DynamicPushbackInputStream(InputStream in) {
+    super(in);
+  }
+
+  public DynamicPushbackInputStream(InputStream in, int initialSize) {
+    super(in,initialSize);
+    this.origsize = initialSize;
+  }
+
+  public synchronized int clear() {
+    int m = buf.length;
+    buf = new byte[origsize];
+    pos = origsize;
+    return m;
+  }
+  
+  public synchronized int shrink() {
+    byte[] old = buf;
+    if (pos == 0) return 0; // nothing to do
+    int n = old.length - pos;
+    int m, p,s,l;
+    if (n < origsize) {
+      buf = new byte[origsize];
+      p = pos;
+      s = origsize - n;
+      l = old.length-p;
+      m = old.length - origsize;
+      pos = s;
+    } else {
+      buf = new byte[n];
+      p = pos;
+      s = 0;
+      l = n;
+      m = old.length - l;
+      pos = 0;
+    }
+    System.arraycopy(old, p, buf, s, l);
+    return m;
+  }
+  
+  private void resize(int len) {
+    byte[] old = buf;
+    buf = new byte[old.length + len];
+    System.arraycopy(old, 0, buf, len, old.length);
+  }
+
+  public synchronized void unread(byte[] b, int off, int len) throws IOException {
+    if (len > pos && pos + len > buf.length) {
+      resize(len-pos);
+      pos += len-pos;
+    }
+    super.unread(b, off, len);
+  }
+
+  public synchronized void unread(int b) throws IOException {
+    if (pos == 0) {
+      resize(1);
+      pos++;
+    }
+    super.unread(b);
+  }
+  
+  public synchronized int read() throws IOException {
+    int m = super.read();
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return m;
+  }
+
+  public synchronized int read(byte[] b, int off, int len) throws IOException {
+    this.available(); // workaround for a problem in PushbackInputStream, without this, the amount of bytes read from some streams will be incorrect
+    int r = super.read(b, off, len);
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return r;
+  }
+
+  public synchronized long skip(long n) throws IOException {
+    long r = super.skip(n);
+    if (pos >= buf.length && buf.length > origsize) shrink();
+    return r;
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/FilterCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/FilterCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/FilterCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/FilterCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class FilterCodepointIterator 
+  extends CodepointIterator {
+
+  private CodepointIterator internal;
+  
+  protected FilterCodepointIterator(CodepointIterator internal) {
+    this.internal = internal;
+  }
+  
+  @Override
+  protected char get() {
+    return internal.get();
+  }
+
+  @Override
+  protected char get(int index) {
+    return internal.get(index);
+  }
+
+  @Override
+  public boolean hasNext() {
+    return internal.hasNext();
+  }
+
+  @Override
+  public boolean isHigh(int index) {
+    return internal.isHigh(index);
+  }
+
+  @Override
+  public boolean isLow(int index) {
+    return internal.isLow(index);
+  }
+
+  @Override
+  public int limit() {
+    return internal.limit();
+  }
+
+  @Override
+  public int next() throws InvalidCharacterException {
+    return internal.next();
+  }
+
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    return internal.nextChars();
+  }
+
+  @Override
+  public int peek() throws InvalidCharacterException {
+    return internal.peek();
+  }
+
+  @Override
+  public int peek(int index) throws InvalidCharacterException {
+    return internal.peek(index);
+  }
+
+  @Override
+  public char[] peekChars() throws InvalidCharacterException {
+    return internal.peekChars();
+  }
+
+  @Override
+  public int position() {
+    return internal.position();
+  }
+
+  @Override
+  public int remaining() {
+    return internal.remaining();
+  }
+  
+  @Override
+  public void position(int position) {
+    internal.position(position);
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/InvalidCharacterException.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/InvalidCharacterException.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/InvalidCharacterException.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/InvalidCharacterException.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,38 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.io.IOException;
+
+public class InvalidCharacterException 
+  extends IOException {
+
+  private static final long serialVersionUID = -7150645484748059676L;
+  private int input;
+  
+  public InvalidCharacterException(int input) {
+    this.input = input;
+  }
+
+  @Override
+  public String getMessage() {
+    return "Invalid Character 0x" + Integer.toHexString(input);
+  }
+
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/PeekAheadInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/PeekAheadInputStream.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/PeekAheadInputStream.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/PeekAheadInputStream.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,54 @@
+package org.apache.abdera.util.io;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * A version of RewindableInputStream that provides methods for peeking ahead 
+ * in the stream (equivalent to read() followed by an appropriate unread() 
+ */
+public class PeekAheadInputStream 
+  extends RewindableInputStream {
+  
+  public PeekAheadInputStream(InputStream in) {
+    super(in);
+  }
+  
+  public PeekAheadInputStream(InputStream in, int initialSize) {
+    super(in,initialSize);
+  }
+
+  public synchronized int peek() throws IOException {
+    int m = read();
+    unread(m);
+    return m;
+  }
+  
+  public synchronized int peek(byte[] buf) throws IOException {
+    return peek(buf, 0, buf.length);
+  }
+  
+  public synchronized int peek(byte[] buf, int off, int len) throws IOException {
+    int r = read(buf, off, len);
+    unread(buf,off,len);
+    return r;
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ReadWriteByteChannel.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ReadWriteByteChannel.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ReadWriteByteChannel.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/ReadWriteByteChannel.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,179 @@
+package org.apache.abdera.util.io;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.io.Serializable;
+import java.io.Writer;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+
+/**
+ * Implements a buffer that provides a slightly more efficient way of writing,
+ * and then reading a stream of bytes.  
+ * 
+ * To use:
+ * 
+ *   ReadWriteByteChannel rw = new ReadWriteByteChannel();
+ *   OutputStream out = rw.getOutputStream();
+ *   out.write(bytes);
+ *   out.close();
+ * 
+ *   InputStream in = rw.getInputStream();
+ *   int i = -1;
+ *   while ((i = in.read()) != -1) {...}
+ * 
+ * By default, closing the OutputStream will automatically cause it to
+ * flip over to Read mode, locking the buffer from further writes and 
+ * setting the read position to 0.
+ * 
+ * Once the Buffer has been fully read, it must be reset, which sets it 
+ * back into write mode and moves the position pointer back to 0;
+ * 
+ */
+public class ReadWriteByteChannel 
+  implements ReadableByteChannel,
+             WritableByteChannel,
+             Cloneable, 
+             Serializable {
+
+  private static final long serialVersionUID = 5984202999779004084L;
+  private static final int INITIAL_CAPACITY = 32;
+  protected int position = 0;
+  protected int scale = INITIAL_CAPACITY;
+  protected boolean closed = false;
+  protected byte[] buffer = null;
+  protected boolean flipped = false;
+  protected boolean flipOnClose = true;
+  
+  public ReadWriteByteChannel() {
+    this(INITIAL_CAPACITY);
+  }
+  
+  public ReadWriteByteChannel(int capacity) {
+    this(capacity, true);
+  }
+  
+  public ReadWriteByteChannel(int capacity, boolean flipOnClose) {
+    grow(capacity);
+    this.scale = capacity;
+    this.flipOnClose = flipOnClose;
+  }
+  
+  public Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+  
+  public InputStream getInputStream() {
+    if (!flipped) notflipped();
+    return Channels.newInputStream(this);
+  }
+  
+  public OutputStream getOutputStream() {
+    if (flipped) alreadyflipped();
+    return Channels.newOutputStream(this);
+  }
+  
+  public Writer getWriter(String charset) {
+    if (flipped) alreadyflipped();
+    return Channels.newWriter(this, charset);
+  }
+  
+  public Reader getReader(String charset) {
+    if (!flipped) notflipped();
+    return Channels.newReader(this, charset);
+  }
+  
+  public byte[] getBuffer() {
+    if (!flipped) notflipped(); 
+    return buffer;
+  }
+  
+  public CodepointIterator getIterator() {
+    if (!flipped) notflipped();
+    return new ByteArrayCodepointIterator(buffer);
+  }
+  
+  public CodepointIterator getIterator(String charset) {
+    if (!flipped) notflipped();
+    return new ByteArrayCodepointIterator(buffer,charset);
+  }
+  
+  private void grow(int capacity)  {
+    if (buffer == null) {
+      buffer = new byte[capacity];
+      return;
+    } else {
+      byte[] buf =  new byte[buffer.length + capacity];
+      System.arraycopy(buffer, 0, buf, 0, buffer.length);
+      buffer = buf;
+    }
+  }
+
+  private void compact() {
+    if (buffer != null) {
+      byte[] buf = new byte[position];
+      System.arraycopy(buffer,0,buf,0,position);
+      buffer = buf;
+    }
+  }
+  
+  public synchronized int read(ByteBuffer dst) throws IOException {
+    if (!flipped) notflipped();
+    if (dst.hasRemaining() && position < buffer.length) {
+      int r = Math.min(dst.remaining(), buffer.length - position);
+      dst.put(buffer, position, r);
+      dst.flip();
+      position += r;
+      return r;
+    }
+    return -1;
+  }
+
+  public synchronized void flip() {
+    if (flipped) alreadyflipped();
+    compact();
+    position = 0;
+    flipped = true;
+  }
+  
+  public synchronized void rewind() {
+    position = 0;
+  }
+  
+  public synchronized void reset() {
+    position = 0;
+    compact();   // clear the buffer
+    grow(scale); // grow the buffer
+    flipped = false;
+  }
+  
+  public void close() throws IOException {
+    if (flipOnClose) flip();
+  }
+  
+  public boolean isOpen() {
+    return true;
+  }
+
+  public synchronized int write(ByteBuffer src) throws IOException {
+    if (flipped) alreadyflipped();
+    if (src.hasRemaining()) {
+      int r = Math.min(src.remaining(), buffer.length - position);
+      src.get(buffer, position, r);
+      position += r;
+      return r;
+    }
+    return -1;
+  }
+
+  private void alreadyflipped() {
+    throw new IllegalStateException("The buffer has already been flipped");
+  }
+  
+  private void notflipped() {
+    throw new IllegalStateException("The buffer has not yet been flipped");
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RestrictedCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RestrictedCodepointIterator.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RestrictedCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RestrictedCodepointIterator.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,123 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.util.BitSet;
+
+import org.apache.abdera.util.ChainableBitSet;
+
+
+/**
+ * A CodepointIterator implementation that checks output against a BitSet.
+ * If the iterator is set to "scanning only", the iterator will return -1
+ * upon encountering a codepoint not in the set, otherwise the iterator 
+ * will throw an InvalidCharacterException
+ */
+public class RestrictedCodepointIterator 
+  extends FilterCodepointIterator {
+
+  private BitSet bitset;
+  private boolean scanningOnly = false;
+  private boolean notset = false;
+
+  protected RestrictedCodepointIterator(
+    CodepointIterator internal, 
+    BitSet bitset) {
+      this(internal,bitset,false);
+  }
+
+  protected RestrictedCodepointIterator(
+    CodepointIterator internal, 
+    BitSet bitset,
+    boolean scanningOnly) {
+      this(internal, bitset, scanningOnly, false);
+  }
+  
+  protected RestrictedCodepointIterator(
+      CodepointIterator internal, 
+      BitSet bitset,
+      boolean scanningOnly,
+      boolean notset) {
+      super(internal);
+      this.bitset = bitset;
+      this.scanningOnly = scanningOnly;
+      this.notset = notset;
+    }
+
+  public boolean hasNext() {
+    boolean b = super.hasNext();
+    if (scanningOnly) {
+      try {
+        int cp = peek(position());
+        if (b && cp != -1 && check(cp)) return false;
+      } catch (InvalidCharacterException e) { return false; }
+    } 
+    return b;
+  }
+  
+  @Override
+  public int next() throws InvalidCharacterException {
+    int cp = super.next();
+    if (cp != -1 && check(cp)) {
+      if (scanningOnly) {
+        position(position()-1);
+        return -1;
+      }
+      else throw new InvalidCharacterException(cp);
+    }
+    return cp;
+  }
+
+  private boolean check(int cp) {
+    return (!notset) ? !bitset.get(cp) : bitset.get(cp);
+  }
+  
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    char[] chars = super.nextChars();
+    if (chars != null && chars.length > 0) {
+      if (chars.length == 1 && check(chars[0])) {
+        if (scanningOnly) {
+          position(position()-1);
+          return null;
+        }
+        else throw new InvalidCharacterException(chars[0]);
+      } else if (chars.length == 2) {
+        int cp = CharUtils.toCodePoint(chars);
+        if (check(cp)) {
+          if (scanningOnly) {
+            position(position()-2);
+            return null; 
+          }
+          else throw new InvalidCharacterException(cp);
+        }
+      }
+    }
+    return chars;
+  }
+ 
+  public static void main(String... args) throws Exception {
+    
+    ChainableBitSet set = new ChainableBitSet().set2('a','b','c');
+    char[] c = {'a','b','c',CharUtils.getHighSurrogate(0x10000),CharUtils.getLowSurrogate(0x10000)};
+    
+    CodepointIterator ci = CodepointIterator.forCharArray(c);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while(rci.hasNext()) System.out.println(rci.next());
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RewindableInputStream.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RewindableInputStream.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RewindableInputStream.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/io/RewindableInputStream.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,107 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * RewindableInputStream is a specialization of the PushbackInputStream
+ * that maintains an internal buffer of read bytes that a user can 
+ * rewind (unread) back into the stream without having to do their 
+ * own buffer management.  The rewind buffer grows dynamically
+ */
+public class RewindableInputStream 
+  extends DynamicPushbackInputStream {
+
+  private static final int INITIAL_CAPACITY = 32;
+  private byte[] buffer;
+  private int position;
+  private int scale = INITIAL_CAPACITY;
+  
+  public RewindableInputStream(InputStream in) {
+    this(in,INITIAL_CAPACITY);
+  }
+  
+  public RewindableInputStream(InputStream in, int capacity) {
+    super(in);
+    grow(capacity);
+    this.scale = capacity;
+  }
+
+  private void grow(int capacity)  {
+    if (buffer == null) {
+      buffer = new byte[capacity];
+      return;
+    } else {
+      byte[] buf =  new byte[buffer.length + capacity];
+      System.arraycopy(buffer, 0, buf, 0, buffer.length);
+      buffer = buf;
+    }
+  }
+  
+  private void shrink(int len) {
+    if (buffer == null) return;
+    byte[] buf = new byte[buffer.length-len];
+    System.arraycopy(buffer, 0, buf, 0, buf.length);
+    position = buffer.length-len;
+    buffer = buf;
+  }
+  
+  public void rewind() throws IOException {
+    if (buffer.length == 0) return;
+    unread(buffer,0,position);
+    shrink(buffer.length);
+  }
+  
+  public void rewind(int offset, int len) throws IOException {
+    if (buffer.length == 0) return;
+    if (offset > buffer.length) 
+      throw new ArrayIndexOutOfBoundsException(offset);
+    unread(buffer,offset,len);
+    shrink(len);
+  }
+  
+  public void rewind(int len) throws IOException {
+    if (buffer.length == 0) return;
+    rewind(buffer.length-len,len);
+  }
+  
+  public int read() throws IOException {
+    int i = super.read();
+    if (i != -1) {
+      if (position >= buffer.length) grow(scale);
+      buffer[position++] = (byte) i;
+    }
+    return i;
+  }
+
+  public int read(byte[] b, int off, int len) throws IOException {
+    int r = super.read(b, off, len);
+    if (r != -1) {
+      if (position + r >= buffer.length) grow(Math.max(position+r,scale));
+      System.arraycopy(b, off, buffer, position++, r);
+    }
+    return r;
+  }
+
+  public long skip(long n) throws IOException {
+    return super.skip(n);
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/AbstractScheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/AbstractScheme.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/AbstractScheme.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/AbstractScheme.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,56 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import org.apache.abdera.util.io.CodepointIterator;
+
+/**
+ * Base implementation for IRI scheme providers
+ */
+public abstract class AbstractScheme 
+  implements Scheme {
+
+  protected final String name;
+  
+  protected AbstractScheme(String name) {
+    this.name = name;
+  }
+  
+  public String getName() {
+    return name;
+  }
+
+  /**
+   * Default to use normalization-based comparison
+   */
+  public boolean equivalent(IRI iri1, IRI iri2) {
+    String s2 = iri2.normalize().toASCIIString();
+    String s1 = iri1.normalize().toASCIIString();
+    return s1.compareTo(s2) == 0;
+  }
+  
+  /**
+   * Default to use default parsing
+   */
+  public boolean parse(
+    CodepointIterator reader, 
+    Builder builder) {
+      return false;
+  }
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Builder.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Builder.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Builder.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Builder.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.abdera.util.iri;
+
+/**
+ * Interface implemented by the IRI builder (used by custom scheme parsers
+ * to set the parsed IRI data.
+ */
+public interface Builder {
+
+  public abstract void scheme(int s, int l);
+
+  public abstract void authority(int s, int l);
+
+  public abstract void path(int s, int l);
+
+  public abstract void query(int s, int l);
+
+  public abstract void fragment(int s, int l);
+
+}
\ No newline at end of file

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Constants.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Constants.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Constants.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Constants.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,131 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.util.BitSet;
+
+import org.apache.abdera.util.ChainableBitSet;
+
+
+public class Constants {
+
+  public final static BitSet get(char c) {
+    return (new ChainableBitSet()).set2(c);
+  }
+  
+  public final static char[] hex = {
+    '0', '1', '2', '3', '4', '5', '6', '7', 
+    '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+
+  public final static BitSet COLON      = new ChainableBitSet().set2(':');
+  public final static BitSet QUERYMARK  = new ChainableBitSet().set2('?');
+  public final static BitSet HASH       = new ChainableBitSet().set2('#');
+  public final static BitSet SLASH      = new ChainableBitSet().set2('/');
+  public final static BitSet SEPS       = new ChainableBitSet().set2("/?#");
+  
+  public final static BitSet DIGIT      = new ChainableBitSet().set2('0','9');
+  public final static BitSet ALPHA      = new ChainableBitSet().set2('A', 'Z').set2('a','z');
+  public final static BitSet ALPHANUM   = new ChainableBitSet().set2(DIGIT).set2(ALPHA);
+  public final static BitSet HEX        = new ChainableBitSet().set2(DIGIT).set2('A','F').set2('a','f');
+  public final static BitSet MARK       = new ChainableBitSet().set2("-_.!~*\'()");
+  public final static BitSet UNRESERVED = new ChainableBitSet().set2(ALPHANUM).set2(MARK);
+  public final static BitSet RESERVED   = new ChainableBitSet().set2(";/?:@&=+$,[]");
+  public final static BitSet ESCAPED    = new ChainableBitSet().set2(0);
+  public final static BitSet GENDELIMS  = new ChainableBitSet().set2(":/?#[]@");
+  public final static BitSet SUBDELIMS  = new ChainableBitSet().set2("!$&\\'()*+,;=");
+  public final static BitSet PCHAR      = new ChainableBitSet().set2(UNRESERVED).set2(ESCAPED).set2(":@&=+$,");
+  public final static BitSet PATH       = new ChainableBitSet().set2(PCHAR).set2(";/%");
+  public final static BitSet SCHEME     = new ChainableBitSet().set2(ALPHA).set2(DIGIT).set2("+-.");
+  public final static BitSet USERINFO   = new ChainableBitSet().set2(UNRESERVED).set2(SUBDELIMS).set2('%').set2(HEX);
+  public final static BitSet QUERY      = new ChainableBitSet().set2(PCHAR).set2('/','?');
+  public final static BitSet FRAGMENT   = new ChainableBitSet().set2(PCHAR).set2('/','?');
+  public final static BitSet PCTENC     = new ChainableBitSet().set2('%').set2(HEX);
+  
+  //userinfo    = *( unreserved / pct-encoded / sub-delims / ":" )
+  
+  public final static BitSet BIDI = new ChainableBitSet()
+  .set2('\u200E', // Left-to-right mark
+        '\u200F', // Right-to-left mark
+        '\u202A', // Left-to-right embedding
+        '\u202B', // Right-to-left embedding
+        '\u202D', // Left-to-right override
+        '\u202E', // Right-to-left override
+        '\u202C');// Pop directional formatting
+  
+  public final static BitSet UCSCHAR    = new ChainableBitSet().set2('\u00A0', '\uD7FF')
+                                            .set2('\uF900','\uFDCF') 
+                                            .set2('\uFDF0','\uFFEF')
+                                            .set2(0x10000,0x1FFFD)
+                                            .set2(0x20000,0x2FFFD)
+                                            .set2(0x30000,0x3FFFD)
+                                            .set2(0x40000,0x4FFFD)
+                                            .set2(0x50000,0x5FFFD)
+                                            .set2(0x60000,0x6FFFD)
+                                            .set2(0x70000,0x7FFFD)
+                                            .set2(0x80000,0x8FFFD)
+                                            .set2(0x90000,0x9FFFD)
+                                            .set2(0xA0000,0xAFFFD)
+                                            .set2(0xB0000,0xBFFFD)
+                                            .set2(0xC0000,0xCFFFD)
+                                            .set2(0xD0000,0xDFFFD)
+                                            .set2(0xE1000,0xEFFFD);
+  
+  public final static BitSet IPRIVATE   = new ChainableBitSet().set2('\uE000', '\uF8FF')
+                                            .set2(0xF0000,0xFFFFD)
+                                            .set2(0x100000,0x10FFFD);
+  
+  public final static BitSet IUNRESERVED= new ChainableBitSet().set2(ALPHANUM)
+                                            .set2(MARK)
+                                            .set2(UCSCHAR)
+                                            .set2(BIDI,false);
+  
+  public final static BitSet IPCHAR     = new ChainableBitSet().set2(IUNRESERVED)
+                                            .set2(ESCAPED)
+                                            .set2(":@&=+$,");
+  
+  public final static BitSet IPATH       = new ChainableBitSet().set2(IPCHAR).set2(";/%");
+  
+  public final static BitSet IQUERY     = new ChainableBitSet().set2(IPCHAR)
+                                            .set2(IPRIVATE)
+                                            .set2(";/?%");
+  
+  public final static BitSet IFRAGMENT  = new ChainableBitSet().set2(IPCHAR).set2("/?%");
+  
+  public final static BitSet IREGNAME   = new ChainableBitSet().set2(IUNRESERVED)
+                                            .set2(ESCAPED)
+                                            .set2("$,;:@&=+");
+  
+  public final static BitSet IUSERINFO  = new ChainableBitSet().set2(IUNRESERVED)
+                                            .set2(ESCAPED)
+                                            .set2(";:&=+$,");
+  
+  public final static BitSet ISERVER    = new ChainableBitSet().set2(IUSERINFO)
+                                            .set2(IREGNAME)
+                                            .set2(ALPHANUM)
+                                            .set2(".:@[]%-")
+                                            .set2('?',false);
+  
+  public final static BitSet STD3ASCIIRULES = new ChainableBitSet()
+                                                .set2(0x0000,0x002C)
+                                                .set2(0x002E,0x002F)
+                                                .set2(0x003A,0x0040)
+                                                .set2(0x005B,0x0060)
+                                                .set2(0x007B,0x007F);
+  
+
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Escaping.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Escaping.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Escaping.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/Escaping.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,156 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStreamReader;
+import java.io.UnsupportedEncodingException;
+import java.util.BitSet;
+
+import org.apache.abdera.util.io.CharUtils;
+
+
+/**
+ * Performs URL Percent Encoding
+ */
+public final class Escaping {
+
+  private Escaping() {}
+  
+  private static void encode(StringBuffer sb, byte... bytes) {
+    for (byte c : bytes) {
+      sb.append("%");
+      sb.append(Constants.hex[(c >> 4) & 0x0f]);
+      sb.append(Constants.hex[(c >> 0) & 0x0f]);
+    }
+  }
+  
+  public static String encode(String s, BitSet... maps) {
+    try {
+      if (s == null) return null;
+      return encode(s,"utf-8",maps);
+    } catch (UnsupportedEncodingException e) {
+      return null; // shouldn't happen
+    }
+  }
+  
+  public static String encode(
+    String s, 
+    String enc, 
+    BitSet... maps) 
+      throws UnsupportedEncodingException {
+    if (s == null) return s;
+    StringBuffer sb = new StringBuffer();
+    char[] chars = s.toCharArray();
+    for (int n = 0; n < chars.length; n++) {
+      char c = (char) chars[n];
+      if (!CharUtils.isSet(c,maps) && !CharUtils.isHighSurrogate(c)) {
+        encode(sb,String.valueOf(c).getBytes(enc));
+      } else if (CharUtils.isHighSurrogate(c)) {
+        if (!CharUtils.isSet(c,maps)) {
+          StringBuffer buf = new StringBuffer();
+          buf.append(c);
+          buf.append(chars[++n]);
+          byte[] b = buf.toString().getBytes(enc);
+          encode(sb,b);
+        } else {
+          sb.append(c);
+          sb.append(chars[++n]);
+        }
+      } else {
+        sb.append(c);
+      }
+    }
+    return sb.toString();
+  }
+  
+  public static String decode(String e, String enc) 
+    throws UnsupportedEncodingException {
+      DecodingReader r = new DecodingReader(e.getBytes(),enc);
+      char[] buf = new char[e.length()];
+      try {
+        int l = r.read(buf);
+        e = new String(buf,0,l);
+      } catch (Exception ex) {}
+      return e;
+  }
+  
+  public static String decode(String e) {
+    if (e == null) return null;
+    DecodingReader r = new DecodingReader(e.getBytes());
+    char[] buf = new char[e.length()];
+    try {
+      int l = r.read(buf);
+      e = new String(buf,0,l);
+    } catch (Exception ex) {}
+    return e;
+  }
+  
+  public static class DecodingInputStream 
+    extends ByteArrayInputStream {
+  
+    DecodingInputStream(byte[] buf) {
+      super(buf);
+    }
+    public int read() {
+      int c = super.read();
+      if (c == '%') {
+        int c1 = super.read();
+        int c2 = super.read();
+        return decode((char)c1,(char)c2);
+      } else {
+        return c;
+      }
+    }
+    @Override
+    public synchronized int read(byte[] b, int off, int len) {
+      int n = off;
+      int i = -1;
+      while ((i = read()) != -1 && n < off+len) {
+        b[n++] = (byte)i;
+      }
+      return n - off;
+    }
+  }
+  
+  public static class DecodingReader 
+    extends InputStreamReader {
+      public DecodingReader(byte[] buf) {
+        super(new DecodingInputStream(buf));
+      }
+      public DecodingReader(
+        byte[] buf, 
+        String encoding) 
+          throws UnsupportedEncodingException {
+        super(new DecodingInputStream(buf),encoding);
+      }
+  }
+  
+  private static byte decode(char c, int shift) {
+    return (byte)((((c >= '0' && c <= '9') ?
+      c - '0' :
+      (c >= 'A' && c <= 'F') ? c - 'A' + 10 : 
+        (c >= 'a' && c<= 'f') ? c - 'a' + 10 :-1)
+          & 0xf) << shift);
+  }
+  
+  private static byte decode(char c1, char c2) {
+    return (byte)(decode(c1,4) | decode(c2,0));
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpScheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpScheme.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpScheme.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpScheme.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,62 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+class HttpScheme extends AbstractScheme {
+
+  static final String NAME = "http";
+  static final int DEFAULT_PORT = 80;
+  
+  public HttpScheme() {
+    super(NAME);
+  }
+  
+  protected HttpScheme(String name) {
+    super(name);
+  }
+
+  protected int getDefaultPort() {
+    return HttpScheme.DEFAULT_PORT;
+  }
+
+  private boolean equal(String s1, String s2) {
+    return ((s1 != null && s1.equals(s2)) ||
+           ((s2 != null && s2.equals(s1)) ||
+             s1 == null && s2 == null));
+  }
+  
+  @Override
+  public boolean equivalent(IRI iri1, IRI iri2) {
+    if (super.equivalent(iri1, iri2)) 
+      return true;
+    if (!iri1.getScheme().equals(iri2.getScheme()))
+      return false;
+    
+    int port1 = (iri1.getPort() != -1) ? iri1.getPort() : getDefaultPort();
+    int port2 = (iri2.getPort() != -1) ? iri2.getPort() : getDefaultPort();
+    return
+        equal(iri1.getUserInfo(),iri2.getUserInfo()) &&
+        equal(iri1.getASCIIHost(),iri2.getASCIIHost()) &&
+        port1 == port2 &&
+        equal(iri1.getASCIIPath(),iri2.getASCIIPath()) &&
+        equal(iri1.getQuery(),iri2.getQuery()) &&
+        equal(iri1.getFragment(),iri2.getFragment());
+    
+  }
+  
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpsScheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpsScheme.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpsScheme.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/HttpsScheme.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,33 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+class HttpsScheme 
+  extends HttpScheme {
+
+  static final String NAME = "https";
+  static final int DEFAULT_PORT = 443;
+  
+  public HttpsScheme() {
+    super(NAME);
+  }
+  
+  protected int getDefaultPort() {
+    return HttpsScheme.DEFAULT_PORT;
+  }
+}

Added: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IDNA.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IDNA.java?view=auto&rev=448818
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IDNA.java (added)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/iri/IDNA.java Thu Sep 21 22:40:01 2006
@@ -0,0 +1,146 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.util.iri;
+
+import java.io.IOException;
+import java.io.Serializable;
+import java.net.UnknownHostException;
+
+import org.apache.abdera.util.io.CharUtils;
+
+
+/**
+ * Provides an Internationized Domain Name implementation
+ */
+public final class IDNA 
+  implements Serializable, 
+             Cloneable {
+
+  private static final long serialVersionUID = -617056657751424334L;
+  private final String regname;
+  
+  public IDNA(java.net.InetAddress addr) {
+    this(addr.getHostName());
+  }
+  
+  public IDNA(String regname) {
+    this.regname = toUnicode(regname);
+  }
+  
+  public Object clone() throws CloneNotSupportedException {
+    return super.clone();
+  }
+  
+  public String toASCII() {
+    return toASCII(regname);
+  }
+  
+  public String toUnicode() {
+    return toUnicode(regname);
+  }
+  
+  public java.net.InetAddress getInetAddress() throws UnknownHostException {
+    return java.net.InetAddress.getByName(toASCII());
+  }
+  
+  @Override
+  public int hashCode() {
+    final int PRIME = 31;
+    int result = 1;
+    result = PRIME * result + ((regname == null) ? 0 : regname.hashCode());
+    return result;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (this == obj)
+      return true;
+    if (obj == null)
+      return false;
+    if (getClass() != obj.getClass())
+      return false;
+    final IDNA other = (IDNA) obj;
+    if (regname == null) {
+      if (other.regname != null)
+        return false;
+    } else if (!regname.equals(other.regname))
+      return false;
+    return true;
+  }
+  
+  @Override
+  public String toString() {
+    return toUnicode();
+  }
+
+  public static boolean equals(String idn1, String idn2) {
+    return toUnicode(idn1).equals(toUnicode(idn2));
+  }
+
+  public static String toASCII(String regname) {
+    try {    
+      if (regname == null) return null;
+      String[] labels = regname.split("\\\u002E");
+      StringBuffer buf = new StringBuffer();
+      for (String label : labels) {
+        label = Nameprep.prep(label);
+        char[] chars = label.toCharArray();
+        CharUtils.verifyNot(chars, Constants.STD3ASCIIRULES);
+        if (chars[0] == '\u002D' || 
+            chars[chars.length-1] == '\u002D')
+              throw new IOException("ToASCII violation");
+        if (!CharUtils.inRange(chars,(char)0x000,(char)0x007F)) {
+          if (label.startsWith("xn--"))
+            throw new IOException("ToASCII violation");
+          String pc = Punycode.encode(chars,null).insert(0, "xn--").toString();
+          chars = pc.toCharArray();
+        }
+        if (chars.length > 63)
+          throw new IOException("ToASCII violation");
+        if (buf.length() > 0) buf.append('\u002E');
+        buf.append(chars);
+      }
+      return buf.toString();
+    } catch (IOException e) {
+      return regname;
+    }
+  }
+  
+  public static String toUnicode(String regname) {
+    if (regname == null) return null;
+    String[] labels = regname.split("\\\u002E");
+    StringBuffer buf = new StringBuffer();
+    for (String label : labels) {
+      char[] chars = label.toCharArray();
+      if (!CharUtils.inRange(chars,(char)0x000,(char)0x007F)) {
+        label = Nameprep.prep(label);
+        chars = label.toCharArray();
+      }
+      if (label.startsWith("xn--")) {
+        label = Punycode.decode(label.substring(4));
+        chars = label.toCharArray();
+      }
+      if (buf.length() > 0) buf.append('\u002E');
+      buf.append(chars);
+    }
+    String check = toASCII(buf.toString());
+    if (check.equalsIgnoreCase(regname)) return buf.toString();
+    else return regname;
+  }
+  
+}