You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2006/12/26 09:37:25 UTC

svn commit: r490270 [3/4] - in /directory/sandbox/elecharny/trunks/shared/ldap/src/main: java/org/apache/directory/shared/ldap/schema/ java/org/apache/directory/shared/ldap/util/unicode/ resources/ resources/org/ resources/org/apache/ resources/org/apa...

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * Iterate over Unicode codepoints decoded from an array of bytes
+ */
+public class ByteArrayCodepointIterator 
+  extends CharArrayCodepointIterator {
+  
+  public ByteArrayCodepointIterator(byte[] bytes) {
+    this(bytes,Charset.defaultCharset());
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+    this(bytes,Charset.forName(charset));
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+    CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,119 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * Extension to java.util.BitSet that allows calls to set to be chained, e.g.
+ * bs.set2(1).set(2).set(3), making it easier to define a complex bit set in 
+ * a single declaration.
+ */
+public class ChainableBitSet 
+  extends BitSet {
+
+  private static final long serialVersionUID = -1105957441212997513L;
+
+  public ChainableBitSet and2(BitSet set) {
+    and(set);
+    return this;
+  }
+  
+  public ChainableBitSet addNot2(BitSet set) {
+    andNot(set);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int index) {
+    clear(index);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int... indexes) {
+    for (int i : indexes) clear(i);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int startIndex, int endIndex) {
+    clear(startIndex, endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int index) {
+    flip(index);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int... indexes) {
+    for (int i : indexes) flip(i);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int startIndex, int endIndex) {
+    flip(startIndex,endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet or2(BitSet set) {
+    or(set);
+    return this;
+  }
+  
+  public ChainableBitSet xor2(BitSet set) {
+    xor(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(String s) {
+    char[] chars = s.toCharArray();
+    for (char c : chars) set(c);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set) {
+    this.or(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int ... bits) {
+    for (int n : bits) set(n);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int fromIndex, int toIndex) {
+    super.set(fromIndex, toIndex+1);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex) {
+    super.set(bitIndex);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex, boolean value) {
+    super.set(bitIndex, value);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set, boolean value) {
+    if (value) return set2(set);
+    else this.andNot(set);
+    return this;
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints contained in a char array
+ */
+public class CharArrayCodepointIterator 
+  extends CodepointIterator {
+
+  protected char[] buffer;
+  
+  protected CharArrayCodepointIterator() {}
+  
+  public CharArrayCodepointIterator(char[] buffer) {
+    this(buffer,0,buffer.length);
+  }
+  
+  public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length-n,e);
+  }
+  
+  protected char get() {
+    return (position < limit) ? buffer[position++] : (char)-1;
+  }
+  
+  protected char get(int index) {
+    if (index < 0 || index >= limit) 
+      throw new ArrayIndexOutOfBoundsException(index);
+    return buffer[index];
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints in a java.nio.CharBuffer
+ */
+public class CharBufferCodepointIterator 
+  extends CharArrayCodepointIterator {
+
+  public CharBufferCodepointIterator(CharBuffer cb) {
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,47 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints in a CharSequence (e.g. String, StringBuffer, etc)
+ */
+public class CharSequenceCodepointIterator 
+  extends CodepointIterator {
+
+  private CharSequence buffer;
+  
+  public CharSequenceCodepointIterator(CharSequence buffer) {
+    this(buffer,0,buffer.length());
+  }
+  
+  public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length()-n,e);
+  }
+  
+  protected char get() {
+    return buffer.charAt(position++);
+  }
+
+  protected char get(int index) {
+    return buffer.charAt(index);
+  }
+  
+}
+

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,326 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+  private CharUtils() {}
+ 
+  public static boolean isValidCodepoint(int d) {
+    return d >= 0x000000 && d <= 0x10ffff;
+  }
+  
+  public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(String s, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(String s, BitSet set) throws InvalidCharacterException {
+    if (s == null) return;
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static boolean inRange(char[] chars, char low, char high) {
+    for (int i = 0; i < chars.length; i++)
+      if (chars[i] < low || chars[i] > high) return false;
+    return true;
+  }
+
+  public static boolean inRange(char[] chars, int low, int high) {
+    for (int i = 0; i < chars.length; i++) {
+      char n = chars[i];
+      int c = (isHighSurrogate(n) && 
+               i + 1 < chars.length && 
+               isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
+      if (c < low || c > high) return false;
+    }
+    return true;
+  }
+  
+  public static boolean isSet(int n, BitSet... sets) {
+    if (n == -1) return false;
+    BitSet set = new BitSet();
+    for (BitSet s : sets) set.or(s);
+    return set.get(n);
+  }
+  
+  public static void append(StringBuffer buf, int c) {
+    if (isSupplementary(c)) {
+      buf.append(getHighSurrogate(c));
+      buf.append(getLowSurrogate(c));
+    } else buf.append((char)c);
+  }
+  
+  public static char getHighSurrogate(int c) {
+    return (c >= 0x10000) ?
+       (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+  }
+
+  public static char getLowSurrogate(int c) {    
+    return (c >= 0x10000) ?
+        (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+  }
+  
+  public static boolean isHighSurrogate(char c) {
+    return c <= '\uDBFF' && c >= '\uD800';
+  }
+
+  public static boolean isLowSurrogate(char c) {
+    return c <= '\uDFFF' && c >= '\uDC00';
+  }
+  
+  public static boolean isSupplementary(int c) {
+    return c <= 0x10ffff && c >= 0x010000;
+  }
+  
+  public static boolean isSurrogatePair(char high, char low) {
+    return isHighSurrogate(high) && isLowSurrogate(low);
+  }
+  
+  public static int toCodePoint(char[] chars) {
+    return toCodePoint(chars[0],chars[1]);
+  }
+  
+  public static int toCodePoint(char high, char low) {
+    return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;    
+  }
+
+  public static int charAt(String s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static int charAt(StringBuilder s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static void insert(StringBuffer s, int i, int c) {
+    if (i > 0 && i < s.length()) {
+      char ch = s.charAt(i);
+      boolean low = isLowSurrogate(ch);
+      if (low) {
+        if (low && isHighSurrogate(s.charAt(i-1))) {
+          i--;
+        }
+      }
+    }
+    s.insert(i, toString(c));
+  }
+  
+  public static void setChar(StringBuilder s, int i, int c) {
+    int l = 1;
+    char ch = s.charAt(i);
+    boolean high = isHighSurrogate(ch);
+    boolean low = isLowSurrogate(ch);
+    if (high || low) {
+      if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+      else {
+        if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+          i--; l++;
+        }
+      }
+    }
+    s.replace(i, i+l, toString(c));
+  }
+  
+  public static int size(int c) {
+    return (isSupplementary(c)) ? 2 : 1;
+  }
+  
+  private static String supplementaryToString(int c) {
+    StringBuffer buf = new StringBuffer();
+    buf.append((char)getHighSurrogate(c));
+    buf.append((char)getLowSurrogate(c));
+    return buf.toString();
+  }
+  
+  public static String toString(int c) {
+    return (isSupplementary(c)) ? 
+      supplementaryToString(c) : 
+      String.valueOf((char)c);
+  }
+  
+  
+
+  private static final char LRE = 0x202A; 
+  private static final char RLE = 0x202B; 
+  private static final char LRO = 0x202D; 
+  private static final char RLO = 0x202E; 
+  private static final char LRM = 0x200E; 
+  private static final char RLM = 0x200F;
+  private static final char PDF = 0x202C;
+  
+  /**
+   * Removes leading and trailing bidi controls from the string
+   */
+  public static String stripBidi(String s) {
+    if (s == null || s.length() <= 1) return s;
+    if (charIsBidiControl(s.charAt(0)))
+      s = s.substring(1);
+    if (charIsBidiControl(s.charAt(s.length()-1)))
+      s = s.substring(0,s.length()-1);
+    return s;
+  }
+  
+  /**
+   * Returns true if the character is a bidi control 
+   */
+  public static boolean charIsBidiControl(char c) {
+    return c == 0x202A ||
+    c == LRE ||
+    c == RLE ||
+    c == LRO ||
+    c == RLO ||
+    c == RLM ||
+    c == LRM || 
+    c == PDF;
+  }
+  
+  private static String wrap(String s, char c1, char c2) {
+    StringBuffer buf = new StringBuffer(s);
+    if (buf.length() > 1) {
+      if (buf.charAt(0) != c1) buf.insert(0, c1);
+      if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+    }
+    return buf.toString();
+  }
+  
+  /**
+   * Wrap the string with Bidi Right-to-Left embed
+   */
+  public static String bidiRLE(String s) {
+    return wrap(s,RLE,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Right-to-Left override 
+   */
+  public static String bidiRLO(String s) {
+    return wrap(s,RLO,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Left-to-Right embed
+   */
+  public static String bidiLRE(String s) {
+    return wrap(s,LRE,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Left-to-Right override
+   */
+  public static String bidiLRO(String s) {
+    return wrap(s,LRO,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi RML marks
+   */
+  public static String bidiRLM(String s) {
+    return wrap(s,RLM,RLM);
+  }
+  
+  /**
+   * Wrap the string with Bidi LRM marks
+   */
+  public static String bidiLRM(String s) {
+    return wrap(s,LRM,LRM);
+  }
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints 
+ */
+public abstract class CodepointIterator {
+
+  public static CodepointIterator forCharArray(char[] array) {
+    return new CharArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharSequence(CharSequence seq) {
+    return new CharSequenceCodepointIterator(seq);
+  }
+  
+  public static CodepointIterator forByteArray(byte[] array) {
+    return new ByteArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+    return new CharBufferCodepointIterator(buffer);
+  }
+  
+  protected int position = -1;
+  protected int limit = -1;
+  
+  protected abstract char get();
+  
+  protected abstract char get(int index);
+  
+  public boolean hasNext() {
+    return remaining() > 0;
+  }
+
+  public int last() {
+    return (position() > 0) ? get(position() - 1) : -1;
+  }
+  
+  public int lastPosition() {
+    int p = position();
+    return (p > -1) ? 
+      (p >= limit()) ? p : p - 1 : -1;
+  }
+  
+  public char[] nextChars() throws InvalidCharacterException {
+    if (hasNext()) {
+      if (isNextSurrogate()) {
+        char c1 = get();
+        if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+          char c2 = get();
+          if (CharUtils.isLowSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+          char c2 = get(position()-2);
+          if (CharUtils.isHighSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        }
+      }
+      return new char[] {get()}; 
+    } 
+    return null;
+  }
+  
+  public char[] peekChars() throws InvalidCharacterException {
+    return peekChars(position());
+  }
+  
+  private char[] peekChars(int pos) throws InvalidCharacterException {
+    if (pos < 0 || pos >= limit()) return null;
+    char c1 = get(pos);
+    if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+      char c2 = get(pos+1);
+      if (CharUtils.isLowSurrogate(c2)) {
+        return new char[] {c1,c2};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+      char c2 = get(pos-1);
+      if (CharUtils.isHighSurrogate(c2)) {
+        return new char[] {c2,c1};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else  return new char[] {c1}; 
+  }
+  
+  public int next() throws InvalidCharacterException {
+    char[] chars = nextChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+
+  public int peek() throws InvalidCharacterException {
+    char[] chars = peekChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public int peek(int index) throws InvalidCharacterException {
+    char[] chars = peekChars(index);
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public void position(int n) {
+    if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+    position = n;
+  }
+  
+  public int position() {
+    return position;
+  }
+
+  public int limit() {
+    return limit;
+  }
+  
+  public int remaining() {
+    return limit - position();
+  }
+  
+  private boolean isNextSurrogate() {
+    if (!hasNext()) return false;
+    char c = get(position());
+    return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+  }
+
+  public boolean isHigh(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isHighSurrogate(get(index));
+  }
+
+  public boolean isLow(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isLowSurrogate(get(index));
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class FilterCodepointIterator 
+  extends CodepointIterator {
+
+  private CodepointIterator internal;
+  
+  protected FilterCodepointIterator(CodepointIterator internal) {
+    this.internal = internal;
+  }
+  
+  @Override
+  protected char get() {
+    return internal.get();
+  }
+
+  @Override
+  protected char get(int index) {
+    return internal.get(index);
+  }
+
+  @Override
+  public boolean hasNext() {
+    return internal.hasNext();
+  }
+
+  @Override
+  public boolean isHigh(int index) {
+    return internal.isHigh(index);
+  }
+
+  @Override
+  public boolean isLow(int index) {
+    return internal.isLow(index);
+  }
+
+  @Override
+  public int limit() {
+    return internal.limit();
+  }
+
+  @Override
+  public int next() throws InvalidCharacterException {
+    return internal.next();
+  }
+
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    return internal.nextChars();
+  }
+
+  @Override
+  public int peek() throws InvalidCharacterException {
+    return internal.peek();
+  }
+
+  @Override
+  public int peek(int index) throws InvalidCharacterException {
+    return internal.peek(index);
+  }
+
+  @Override
+  public char[] peekChars() throws InvalidCharacterException {
+    return internal.peekChars();
+  }
+
+  @Override
+  public int position() {
+    return internal.position();
+  }
+
+  @Override
+  public int remaining() {
+    return internal.remaining();
+  }
+  
+  @Override
+  public void position(int position) {
+    internal.position(position);
+  }
+
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.io.IOException;
+
+public class InvalidCharacterException 
+  extends IOException {
+
+  private static final long serialVersionUID = -7150645484748059676L;
+  private int input;
+  
+  public InvalidCharacterException(int input) {
+    this.input = input;
+  }
+
+  @Override
+  public String getMessage() {
+    return "Invalid Character 0x" + Integer.toHexString(input);
+  }
+}
\ No newline at end of file

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,175 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.io.IOException;
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+  public enum Mask {
+    NONE,
+    COMPATIBILITY,
+    COMPOSITION
+  }
+  
+  public enum Form { 
+    D, 
+    C(Mask.COMPOSITION), 
+    KD(Mask.COMPATIBILITY), 
+    KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+    
+    private int mask = 0;
+
+    Form(Mask... masks) {
+      for (Mask mask : masks) {
+        this.mask |= (mask.ordinal());
+      }
+    }
+    
+    public boolean isCompatibility() {
+      return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+    }
+    
+    public boolean isCanonical() {
+      return !isCompatibility();
+    }
+    
+    public boolean isComposition() {
+      return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+    }
+  }
+  
+  private Normalizer() {}
+  
+  /**
+   * Normalize the string using NFKC
+   */
+  public static StringBuilder normalize(String source) throws IOException {
+    return normalize(source, Form.KC);
+  }
+  
+  /**
+   * Normalize the string using the specified Form
+   */
+  public static StringBuilder normalize(
+    String source, 
+    Form form) 
+      throws IOException {
+    return normalize(source, form, new StringBuilder());
+  }
+  
+  /**
+   * Normalize the string into the given StringBuffer using the given Form
+   */
+  public static StringBuilder normalize(
+    String source, 
+    Form form, 
+    StringBuilder buf) 
+      throws IOException {
+      UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+      if (source.length() != 0 && ucd != null) {
+        decompose(ucd, source, form, buf);
+        compose(ucd, form, buf);
+      }
+      return buf;
+  }
+  
+  private static void decompose(
+    UnicodeCharacterDatabase ucd,
+    String source, 
+    Form form, 
+    StringBuilder buf) 
+      throws IOException {
+      StringBuffer internal = new StringBuffer();
+      CodepointIterator ci = CodepointIterator.forCharSequence(source);
+      boolean canonical = form.isCanonical();
+      while (ci.hasNext()) {
+        int c = ci.next();
+        internal.setLength(0);
+        ucd.decompose(c, canonical, internal);
+        CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+        while(ii.hasNext()) {
+          int ch = ii.next();
+          int i = findInsertionPoint(ucd, buf, ch);
+          buf.insert(i,CharUtils.toString(ch));
+        }
+      }
+    
+  }
+  
+  private static int findInsertionPoint(
+    UnicodeCharacterDatabase ucd, 
+    StringBuilder buf, int c) {
+    int cc = ucd.getCanonicalClass(c);
+    int i = buf.length();
+    if (cc != 0) {
+      int ch;
+      for (; i > 0; i -= CharUtils.size(c)) {
+        ch = CharUtils.charAt(buf, i-1);
+        if (ucd.getCanonicalClass(ch) <= cc) break;
+      }
+    }
+    return i;
+  }
+  
+  private static void compose(
+    UnicodeCharacterDatabase ucd,
+    Form form, 
+    StringBuilder buf) 
+      throws IOException {
+    if (!form.isComposition()) return;
+    int pos = 0;
+    int lc = CharUtils.charAt(buf, pos);
+    int cpos = CharUtils.size(lc);    
+    int lcc = ucd.getCanonicalClass(lc);
+    if (lcc != 0) lcc = 256;
+    int len = buf.length();
+    int c;
+    for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.size(c)) {
+      c = CharUtils.charAt(buf,dpos);
+      int cc = ucd.getCanonicalClass(c);
+      int composite = ucd.getPairComposition(lc, c);
+      if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+        CharUtils.setChar(buf, pos, composite);
+        lc = composite;
+      } else {
+        if (cc == 0) {
+          pos = cpos;
+          lc = c;
+        }
+        lcc = cc;
+        CharUtils.setChar(buf,cpos,c);
+        if (buf.length() != len) {
+          dpos += buf.length() - len;
+          len = buf.length();
+        }
+        cpos += CharUtils.size(c);
+      }
+    }
+    buf.setLength(cpos);
+  }
+  
+  public static void main(String... args) throws Exception {
+    
+    UnicodeCharacterDatabase.main("src/org/apache/abdera/util/unicode/data/ucd.res");
+    
+  }
+}
\ No newline at end of file

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,120 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * A CodepointIterator implementation that checks output against a BitSet.
+ * If the iterator is set to "scanning only", the iterator will return -1
+ * upon encountering a codepoint not in the set, otherwise the iterator 
+ * will throw an InvalidCharacterException
+ */
+public class RestrictedCodepointIterator 
+  extends FilterCodepointIterator {
+
+  private BitSet bitset;
+  private boolean scanningOnly = false;
+  private boolean notset = false;
+
+  protected RestrictedCodepointIterator(
+    CodepointIterator internal, 
+    BitSet bitset) {
+      this(internal,bitset,false);
+  }
+
+  protected RestrictedCodepointIterator(
+    CodepointIterator internal, 
+    BitSet bitset,
+    boolean scanningOnly) {
+      this(internal, bitset, scanningOnly, false);
+  }
+  
+  protected RestrictedCodepointIterator(
+      CodepointIterator internal, 
+      BitSet bitset,
+      boolean scanningOnly,
+      boolean notset) {
+      super(internal);
+      this.bitset = bitset;
+      this.scanningOnly = scanningOnly;
+      this.notset = notset;
+    }
+
+  public boolean hasNext() {
+    boolean b = super.hasNext();
+    if (scanningOnly) {
+      try {
+        int cp = peek(position());
+        if (b && cp != -1 && check(cp)) return false;
+      } catch (InvalidCharacterException e) { return false; }
+    } 
+    return b;
+  }
+  
+  @Override
+  public int next() throws InvalidCharacterException {
+    int cp = super.next();
+    if (cp != -1 && check(cp)) {
+      if (scanningOnly) {
+        position(position()-1);
+        return -1;
+      }
+      else throw new InvalidCharacterException(cp);
+    }
+    return cp;
+  }
+
+  private boolean check(int cp) {
+    return (!notset) ? !bitset.get(cp) : bitset.get(cp);
+  }
+  
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    char[] chars = super.nextChars();
+    if (chars != null && chars.length > 0) {
+      if (chars.length == 1 && check(chars[0])) {
+        if (scanningOnly) {
+          position(position()-1);
+          return null;
+        }
+        else throw new InvalidCharacterException(chars[0]);
+      } else if (chars.length == 2) {
+        int cp = CharUtils.toCodePoint(chars);
+        if (check(cp)) {
+          if (scanningOnly) {
+            position(position()-2);
+            return null; 
+          }
+          else throw new InvalidCharacterException(cp);
+        }
+      }
+    }
+    return chars;
+  }
+ 
+  public static void main(String... args) throws Exception {
+    
+    ChainableBitSet set = new ChainableBitSet().set2('a','b','c');
+    char[] c = {'a','b','c',CharUtils.getHighSurrogate(0x10000),CharUtils.getLowSurrogate(0x10000)};
+    
+    CodepointIterator ci = CodepointIterator.forCharArray(c);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while(rci.hasNext()) System.out.println(rci.next());
+  }
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.  For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.directory.shared.ldap.util.unicode;
+
+
+import java.io.BufferedReader;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+
+/**
+ * An implementation of the Unicode Character Database modeled after the 
+ * sample normalization demo available at: 
+ * 
+ * http://www.unicode.org/unicode/reports/tr15/Normalizer.html
+ * 
+ * for now, this has been implemented and tested against Unicode 3.2.0.  We 
+ * need to test is against Unicode 4.0.
+ */
+final class UnicodeCharacterDatabase implements Serializable, Cloneable
+{
+
+    private static final long serialVersionUID = 1596950870716625345L;
+
+    private static final String UCD = "org/apache/directory/shared/ldap/util/unicode/data/ucd.res";
+
+    private final HashMap<Integer, Integer> cc = new HashMap<Integer, Integer>();
+    private final HashMap<Integer, String> decompose = new HashMap<Integer, String>();
+    private final HashMap<Integer, Integer> compose = new HashMap<Integer, Integer>();
+    private final BitSet compatibility = new BitSet();
+    private final BitSet excluded = new BitSet();
+
+    private static UnicodeCharacterDatabase ucd = null;
+
+
+    public synchronized static UnicodeCharacterDatabase getInstance()
+    {
+        if ( ucd == null )
+        {
+            try
+            {
+                ucd = load();
+            }
+            catch ( Exception e )
+            {
+            }
+        }
+        return ucd;
+    }
+
+
+    UnicodeCharacterDatabase()
+    {
+    }
+
+
+    public int getCanonicalClass( int c )
+    {
+        return ( cc.containsKey( c ) ) ? cc.get( c ) : 0;
+    }
+
+
+    public boolean isComposite( int f, int s )
+    {
+        return !( f < 0 || f > 0x10FFFF || s < 0 || s > 0x10FFFF );
+    }
+
+
+    public char getPairComposition( int f, int s )
+    {
+        if ( f < 0 || s > 0x10FFFF || s < 0 || s > 0x10FFFF )
+            return '\uFFFF';
+        Integer i = compose.get( ( f << 16 ) | s );
+        return ( i != null ) ? ( char ) i.intValue() : '\uFFFF';
+    }
+
+
+    public void decompose( int c, boolean canonical, StringBuffer buf )
+    {
+        String d = decompose.get( c );
+        if ( d != null && !( canonical && compatibility.get( c ) ) )
+        {
+            for ( int i = 0; i < d.length(); ++i )
+            {
+                decompose( d.charAt( i ), canonical, buf );
+            }
+        }
+        else
+            CharUtils.append( buf, c );
+    }
+
+
+    public Object clone() throws CloneNotSupportedException
+    {
+        return super.clone();
+    }
+
+
+    public static UnicodeCharacterDatabase load() throws IOException, ClassNotFoundException
+    {
+        ClassLoader cl = Thread.currentThread().getContextClassLoader();
+        InputStream is = cl.getResourceAsStream( UCD );
+        GZIPInputStream gzip = new GZIPInputStream( is );
+        ObjectInputStream ois = new ObjectInputStream( gzip );
+        UnicodeCharacterDatabase ucd = ( UnicodeCharacterDatabase ) ois.readObject();
+        ois.close();
+        gzip.close();
+        is.close();
+        return ucd;
+    }
+
+
+    private static void save( UnicodeCharacterDatabase ucd, String to ) throws IOException
+    {
+        FileOutputStream fos = new FileOutputStream( to );
+        GZIPOutputStream gzip = new GZIPOutputStream( fos );
+        ObjectOutputStream oos = new ObjectOutputStream( gzip );
+        oos.writeObject( ucd );
+        oos.close();
+        gzip.close();
+        fos.close();
+    }
+
+    private static String base;
+    private static String version;
+
+
+    /**
+     * Load the Unicode Character Database from the source files and save as 
+     * a gzip compressed, serialized Java class.
+     */
+    public static void main( String... args ) throws Exception
+    {
+        if ( args.length == 0 )
+        {
+            usage();
+        }
+        
+        base = ( args.length > 1 ) ? args[1] : UCD;
+        version = ( args.length > 2 ) ? args[2] : "3.2.0";
+        UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+        
+        if ( ucd == null )
+        {
+            ucd = new UnicodeCharacterDatabase();
+            Loader.load( ucd );
+        }
+        
+        save( ucd, args[0] );
+    }
+
+
+    private static void usage()
+    {
+        System.out
+            .println( "Usage:\n  java -cp $CLASSPATH com.ibm.usmall.UnicodeCharacterDatabase $filename $datapath" );
+        System.exit( 0 );
+    }
+
+    private static class Loader
+    {
+
+        private static final String EXCLUSIONS = "CompositionExclusions";
+        private static final String UNICODEDATA = "UnicodeData";
+
+
+        //    private static final String EXCLUSIONS = 
+        //      "org/apache/abdera/util/unicode/data/CompositionExclusions-3.2.0.txt";
+        //    
+        //    private static final String UNICODEDATA =
+        //      "org/apache/abdera/util/unicode/data/UnicodeData-3.2.0.txt";
+
+        static String filename( String target )
+        {
+            return base + ( !base.endsWith( "/" ) ? "/" : "" ) + target + "-" + version + ".txt";
+        }
+
+
+        static void load( UnicodeCharacterDatabase ucd ) throws IOException
+        {
+            exclusions( ucd );
+            decomposition( ucd );
+        }
+
+
+        static String stripcomments( String s )
+        {
+            int n = s.indexOf( '#' );
+            return ( n != -1 ) ? s.substring( 0, n ) : s;
+        }
+
+
+        static void exclusions( UnicodeCharacterDatabase ucd ) throws IOException
+        {
+            BufferedReader r = read( filename( EXCLUSIONS ) );
+            String line = null;
+            while ( ( line = r.readLine() ) != null )
+            {
+                line = stripcomments( line );
+                if ( line.length() == 0 )
+                    continue;
+                int v = Integer.parseInt( line.trim(), 16 );
+                ucd.excluded.set( v );
+            }
+            r.close();
+        }
+
+
+        static String dehex( String t )
+        {
+            String[] ts = t.split( " " );
+            StringBuffer buf = new StringBuffer();
+            for ( String token : ts )
+            {
+                if ( token.charAt( 0 ) != '<' )
+                {
+                    int n = Integer.parseInt( token.trim(), 16 );
+                    buf.append( ( char ) n );
+                }
+            }
+            return buf.toString();
+        }
+
+
+        static void decomposition( UnicodeCharacterDatabase ucd ) throws IOException
+        {
+            BufferedReader r = read( filename( UNICODEDATA ) );
+            String line = null;
+            while ( ( line = r.readLine() ) != null )
+            {
+                line = stripcomments( line );
+                if ( line.length() == 0 )
+                    continue;
+                String[] tokens = line.split( ";" );
+                int val = Integer.parseInt( tokens[0], 16 );
+                int cc = Integer.parseInt( tokens[3] );
+                ucd.cc.put( val, cc );
+                String decomp = tokens[5];
+                if ( decomp.length() != 0 )
+                {
+                    if ( decomp.startsWith( "<" ) )
+                    {
+                        ucd.compatibility.set( val );
+                    }
+                    decomp = dehex( decomp );
+                    ucd.decompose.put( val, decomp );
+                    if ( !ucd.compatibility.get( val ) && !ucd.excluded.get( val ) )
+                    {
+                        char f = ( decomp.length() > 1 ) ? decomp.charAt( 0 ) : '\u0000';
+                        char l = ( decomp.length() > 1 ) ? decomp.charAt( 1 ) : decomp.charAt( 0 );
+                        ucd.compose.put( ( f << 16 ) | l, val );
+                    }
+                }
+            }
+            hanguls( ucd );
+            r.close();
+        }
+
+
+        // Use the algorithm used in http://www.unicode.org/unicode/reports/tr15/NormalizerBuilder.java
+        static void hanguls( UnicodeCharacterDatabase ucd ) throws IOException
+        {
+            for ( int s = 0; s < 0x2BA4; ++s )
+            {
+                int t = s % 0x001C;
+                char f = ( t != 0 ) ? ( char ) ( 0xAC00 + s - t ) : ( char ) ( 0x1100 + s / 0x024C );
+                char e = ( t != 0 ) ? ( char ) ( 0x11A7 + t ) : ( char ) ( 0x1161 + ( s % 0x024C ) / 0x001C );
+                int pair = ( f << 16 ) | e;
+                int value = s + 0xAC00;
+                ucd.decompose.put( value, String.valueOf( f ) + e );
+                ucd.compose.put( pair, value );
+            }
+        }
+
+
+        static BufferedReader read( String f )
+        {
+            ClassLoader cl = Thread.currentThread().getContextClassLoader();
+            InputStream in = cl.getResourceAsStream( f );
+            InputStreamReader r = new InputStreamReader( in );
+            BufferedReader buf = new BufferedReader( r );
+            return buf;
+        }
+
+    }
+
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res?view=auto&rev=490270
==============================================================================
Binary file - no diff available.

Propchange: directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * Iterate over Unicode codepoints decoded from an array of bytes
+ */
+public class ByteArrayCodepointIterator 
+  extends CharArrayCodepointIterator {
+  
+  public ByteArrayCodepointIterator(byte[] bytes) {
+    this(bytes,Charset.defaultCharset());
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+    this(bytes,Charset.forName(charset));
+  }
+  
+  public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+    CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,119 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * Extension to java.util.BitSet that allows calls to set to be chained, e.g.
+ * bs.set2(1).set(2).set(3), making it easier to define a complex bit set in 
+ * a single declaration.
+ */
+public class ChainableBitSet 
+  extends BitSet {
+
+  private static final long serialVersionUID = -1105957441212997513L;
+
+  public ChainableBitSet and2(BitSet set) {
+    and(set);
+    return this;
+  }
+  
+  public ChainableBitSet addNot2(BitSet set) {
+    andNot(set);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int index) {
+    clear(index);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int... indexes) {
+    for (int i : indexes) clear(i);
+    return this;
+  }
+  
+  public ChainableBitSet clear2(int startIndex, int endIndex) {
+    clear(startIndex, endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int index) {
+    flip(index);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int... indexes) {
+    for (int i : indexes) flip(i);
+    return this;
+  }
+  
+  public ChainableBitSet flip2(int startIndex, int endIndex) {
+    flip(startIndex,endIndex);
+    return this;
+  }
+  
+  public ChainableBitSet or2(BitSet set) {
+    or(set);
+    return this;
+  }
+  
+  public ChainableBitSet xor2(BitSet set) {
+    xor(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(String s) {
+    char[] chars = s.toCharArray();
+    for (char c : chars) set(c);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set) {
+    this.or(set);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int ... bits) {
+    for (int n : bits) set(n);
+    return this;
+  }
+  
+  public ChainableBitSet set2(int fromIndex, int toIndex) {
+    super.set(fromIndex, toIndex+1);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex) {
+    super.set(bitIndex);
+    return this;
+  }
+
+  public ChainableBitSet set2(int bitIndex, boolean value) {
+    super.set(bitIndex, value);
+    return this;
+  }
+  
+  public ChainableBitSet set2(BitSet set, boolean value) {
+    if (value) return set2(set);
+    else this.andNot(set);
+    return this;
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints contained in a char array
+ */
+public class CharArrayCodepointIterator 
+  extends CodepointIterator {
+
+  protected char[] buffer;
+  
+  protected CharArrayCodepointIterator() {}
+  
+  public CharArrayCodepointIterator(char[] buffer) {
+    this(buffer,0,buffer.length);
+  }
+  
+  public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length-n,e);
+  }
+  
+  protected char get() {
+    return (position < limit) ? buffer[position++] : (char)-1;
+  }
+  
+  protected char get(int index) {
+    if (index < 0 || index >= limit) 
+      throw new ArrayIndexOutOfBoundsException(index);
+    return buffer[index];
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints in a java.nio.CharBuffer
+ */
+public class CharBufferCodepointIterator 
+  extends CharArrayCodepointIterator {
+
+  public CharBufferCodepointIterator(CharBuffer cb) {
+    buffer = cb.array();
+    position = cb.position();
+    limit = cb.limit();
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,47 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints in a CharSequence (e.g. String, StringBuffer, etc)
+ */
+public class CharSequenceCodepointIterator 
+  extends CodepointIterator {
+
+  private CharSequence buffer;
+  
+  public CharSequenceCodepointIterator(CharSequence buffer) {
+    this(buffer,0,buffer.length());
+  }
+  
+  public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+    this.buffer = buffer;
+    this.position = n;
+    this.limit = Math.min(buffer.length()-n,e);
+  }
+  
+  protected char get() {
+    return buffer.charAt(position++);
+  }
+
+  protected char get(int index) {
+    return buffer.charAt(index);
+  }
+  
+}
+

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,326 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+  private CharUtils() {}
+ 
+  public static boolean isValidCodepoint(int d) {
+    return d >= 0x000000 && d <= 0x10ffff;
+  }
+  
+  public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position();
+  }
+  
+  public static int scan(String s, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+    while (rci.hasNext()) rci.next();
+    return rci.position;
+  }
+  
+  public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
+    CodepointIterator ci = CodepointIterator.forCharArray(array);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static void verify(String s, BitSet set) throws InvalidCharacterException {
+    if (s == null) return;
+    CodepointIterator ci = CodepointIterator.forCharSequence(s);
+    RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+    while (rci.hasNext()) rci.next();
+  }
+  
+  public static boolean inRange(char[] chars, char low, char high) {
+    for (int i = 0; i < chars.length; i++)
+      if (chars[i] < low || chars[i] > high) return false;
+    return true;
+  }
+
+  public static boolean inRange(char[] chars, int low, int high) {
+    for (int i = 0; i < chars.length; i++) {
+      char n = chars[i];
+      int c = (isHighSurrogate(n) && 
+               i + 1 < chars.length && 
+               isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
+      if (c < low || c > high) return false;
+    }
+    return true;
+  }
+  
+  public static boolean isSet(int n, BitSet... sets) {
+    if (n == -1) return false;
+    BitSet set = new BitSet();
+    for (BitSet s : sets) set.or(s);
+    return set.get(n);
+  }
+  
+  public static void append(StringBuffer buf, int c) {
+    if (isSupplementary(c)) {
+      buf.append(getHighSurrogate(c));
+      buf.append(getLowSurrogate(c));
+    } else buf.append((char)c);
+  }
+  
+  public static char getHighSurrogate(int c) {
+    return (c >= 0x10000) ?
+       (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+  }
+
+  public static char getLowSurrogate(int c) {    
+    return (c >= 0x10000) ?
+        (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+  }
+  
+  public static boolean isHighSurrogate(char c) {
+    return c <= '\uDBFF' && c >= '\uD800';
+  }
+
+  public static boolean isLowSurrogate(char c) {
+    return c <= '\uDFFF' && c >= '\uDC00';
+  }
+  
+  public static boolean isSupplementary(int c) {
+    return c <= 0x10ffff && c >= 0x010000;
+  }
+  
+  public static boolean isSurrogatePair(char high, char low) {
+    return isHighSurrogate(high) && isLowSurrogate(low);
+  }
+  
+  public static int toCodePoint(char[] chars) {
+    return toCodePoint(chars[0],chars[1]);
+  }
+  
+  public static int toCodePoint(char high, char low) {
+    return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;    
+  }
+
+  public static int charAt(String s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static int charAt(StringBuilder s, int i) {
+    char c = s.charAt(i);
+    if (c < 0xD800 || c > 0xDFFF) return c;
+    if (isHighSurrogate(c)) {
+      if (s.length() != i) {
+        char low = s.charAt(i+1);
+        if (isLowSurrogate(low)) return toCodePoint(c,low);
+      }
+    } else if (isLowSurrogate(c)) {
+      if (i >= 1) {
+        char high = s.charAt(i-1);
+        if (isHighSurrogate(high)) return toCodePoint(high,c);
+      }
+    }
+    return c;
+  }
+  
+  public static void insert(StringBuffer s, int i, int c) {
+    if (i > 0 && i < s.length()) {
+      char ch = s.charAt(i);
+      boolean low = isLowSurrogate(ch);
+      if (low) {
+        if (low && isHighSurrogate(s.charAt(i-1))) {
+          i--;
+        }
+      }
+    }
+    s.insert(i, toString(c));
+  }
+  
+  public static void setChar(StringBuilder s, int i, int c) {
+    int l = 1;
+    char ch = s.charAt(i);
+    boolean high = isHighSurrogate(ch);
+    boolean low = isLowSurrogate(ch);
+    if (high || low) {
+      if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+      else {
+        if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+          i--; l++;
+        }
+      }
+    }
+    s.replace(i, i+l, toString(c));
+  }
+  
+  public static int size(int c) {
+    return (isSupplementary(c)) ? 2 : 1;
+  }
+  
+  private static String supplementaryToString(int c) {
+    StringBuffer buf = new StringBuffer();
+    buf.append((char)getHighSurrogate(c));
+    buf.append((char)getLowSurrogate(c));
+    return buf.toString();
+  }
+  
+  public static String toString(int c) {
+    return (isSupplementary(c)) ? 
+      supplementaryToString(c) : 
+      String.valueOf((char)c);
+  }
+  
+  
+
+  private static final char LRE = 0x202A; 
+  private static final char RLE = 0x202B; 
+  private static final char LRO = 0x202D; 
+  private static final char RLO = 0x202E; 
+  private static final char LRM = 0x200E; 
+  private static final char RLM = 0x200F;
+  private static final char PDF = 0x202C;
+  
+  /**
+   * Removes leading and trailing bidi controls from the string
+   */
+  public static String stripBidi(String s) {
+    if (s == null || s.length() <= 1) return s;
+    if (charIsBidiControl(s.charAt(0)))
+      s = s.substring(1);
+    if (charIsBidiControl(s.charAt(s.length()-1)))
+      s = s.substring(0,s.length()-1);
+    return s;
+  }
+  
+  /**
+   * Returns true if the character is a bidi control 
+   */
+  public static boolean charIsBidiControl(char c) {
+    return c == 0x202A ||
+    c == LRE ||
+    c == RLE ||
+    c == LRO ||
+    c == RLO ||
+    c == RLM ||
+    c == LRM || 
+    c == PDF;
+  }
+  
+  private static String wrap(String s, char c1, char c2) {
+    StringBuffer buf = new StringBuffer(s);
+    if (buf.length() > 1) {
+      if (buf.charAt(0) != c1) buf.insert(0, c1);
+      if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+    }
+    return buf.toString();
+  }
+  
+  /**
+   * Wrap the string with Bidi Right-to-Left embed
+   */
+  public static String bidiRLE(String s) {
+    return wrap(s,RLE,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Right-to-Left override 
+   */
+  public static String bidiRLO(String s) {
+    return wrap(s,RLO,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Left-to-Right embed
+   */
+  public static String bidiLRE(String s) {
+    return wrap(s,LRE,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi Left-to-Right override
+   */
+  public static String bidiLRO(String s) {
+    return wrap(s,LRO,PDF);
+  }
+  
+  /**
+   * Wrap the string with Bidi RML marks
+   */
+  public static String bidiRLM(String s) {
+    return wrap(s,RLM,RLM);
+  }
+  
+  /**
+   * Wrap the string with Bidi LRM marks
+   */
+  public static String bidiLRM(String s) {
+    return wrap(s,LRM,LRM);
+  }
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints 
+ */
+public abstract class CodepointIterator {
+
+  public static CodepointIterator forCharArray(char[] array) {
+    return new CharArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharSequence(CharSequence seq) {
+    return new CharSequenceCodepointIterator(seq);
+  }
+  
+  public static CodepointIterator forByteArray(byte[] array) {
+    return new ByteArrayCodepointIterator(array);
+  }
+  
+  public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+    return new CharBufferCodepointIterator(buffer);
+  }
+  
+  protected int position = -1;
+  protected int limit = -1;
+  
+  protected abstract char get();
+  
+  protected abstract char get(int index);
+  
+  public boolean hasNext() {
+    return remaining() > 0;
+  }
+
+  public int last() {
+    return (position() > 0) ? get(position() - 1) : -1;
+  }
+  
+  public int lastPosition() {
+    int p = position();
+    return (p > -1) ? 
+      (p >= limit()) ? p : p - 1 : -1;
+  }
+  
+  public char[] nextChars() throws InvalidCharacterException {
+    if (hasNext()) {
+      if (isNextSurrogate()) {
+        char c1 = get();
+        if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+          char c2 = get();
+          if (CharUtils.isLowSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+          char c2 = get(position()-2);
+          if (CharUtils.isHighSurrogate(c2)) {
+            return new char[] {c1,c2};
+          } else {
+            throw new InvalidCharacterException(c2);
+          }
+        }
+      }
+      return new char[] {get()}; 
+    } 
+    return null;
+  }
+  
+  public char[] peekChars() throws InvalidCharacterException {
+    return peekChars(position());
+  }
+  
+  private char[] peekChars(int pos) throws InvalidCharacterException {
+    if (pos < 0 || pos >= limit()) return null;
+    char c1 = get(pos);
+    if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+      char c2 = get(pos+1);
+      if (CharUtils.isLowSurrogate(c2)) {
+        return new char[] {c1,c2};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+      char c2 = get(pos-1);
+      if (CharUtils.isHighSurrogate(c2)) {
+        return new char[] {c2,c1};
+      } else {
+        throw new InvalidCharacterException(c2);
+      }
+    } else  return new char[] {c1}; 
+  }
+  
+  public int next() throws InvalidCharacterException {
+    char[] chars = nextChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+
+  public int peek() throws InvalidCharacterException {
+    char[] chars = peekChars();
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public int peek(int index) throws InvalidCharacterException {
+    char[] chars = peekChars(index);
+    return (chars == null) ? -1 :
+      (chars.length == 1) ? chars[0] :
+      CharUtils.toCodePoint(chars[0], chars[1]);
+  }
+  
+  public void position(int n) {
+    if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+    position = n;
+  }
+  
+  public int position() {
+    return position;
+  }
+
+  public int limit() {
+    return limit;
+  }
+  
+  public int remaining() {
+    return limit - position();
+  }
+  
+  private boolean isNextSurrogate() {
+    if (!hasNext()) return false;
+    char c = get(position());
+    return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+  }
+
+  public boolean isHigh(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isHighSurrogate(get(index));
+  }
+
+  public boolean isLow(int index) {
+    if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+    return CharUtils.isLowSurrogate(get(index));
+  }
+  
+}

Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.  For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class FilterCodepointIterator 
+  extends CodepointIterator {
+
+  private CodepointIterator internal;
+  
+  protected FilterCodepointIterator(CodepointIterator internal) {
+    this.internal = internal;
+  }
+  
+  @Override
+  protected char get() {
+    return internal.get();
+  }
+
+  @Override
+  protected char get(int index) {
+    return internal.get(index);
+  }
+
+  @Override
+  public boolean hasNext() {
+    return internal.hasNext();
+  }
+
+  @Override
+  public boolean isHigh(int index) {
+    return internal.isHigh(index);
+  }
+
+  @Override
+  public boolean isLow(int index) {
+    return internal.isLow(index);
+  }
+
+  @Override
+  public int limit() {
+    return internal.limit();
+  }
+
+  @Override
+  public int next() throws InvalidCharacterException {
+    return internal.next();
+  }
+
+  @Override
+  public char[] nextChars() throws InvalidCharacterException {
+    return internal.nextChars();
+  }
+
+  @Override
+  public int peek() throws InvalidCharacterException {
+    return internal.peek();
+  }
+
+  @Override
+  public int peek(int index) throws InvalidCharacterException {
+    return internal.peek(index);
+  }
+
+  @Override
+  public char[] peekChars() throws InvalidCharacterException {
+    return internal.peekChars();
+  }
+
+  @Override
+  public int position() {
+    return internal.position();
+  }
+
+  @Override
+  public int remaining() {
+    return internal.remaining();
+  }
+  
+  @Override
+  public void position(int position) {
+    internal.position(position);
+  }
+
+}