You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@directory.apache.org by el...@apache.org on 2006/12/26 09:37:25 UTC
svn commit: r490270 [3/4] - in
/directory/sandbox/elecharny/trunks/shared/ldap/src/main:
java/org/apache/directory/shared/ldap/schema/
java/org/apache/directory/shared/ldap/util/unicode/ resources/
resources/org/ resources/org/apache/ resources/org/apa...
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ByteArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * Iterate over Unicode codepoints decoded from an array of bytes
+ */
+public class ByteArrayCodepointIterator
+ extends CharArrayCodepointIterator {
+
+ public ByteArrayCodepointIterator(byte[] bytes) {
+ this(bytes,Charset.defaultCharset());
+ }
+
+ public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+ this(bytes,Charset.forName(charset));
+ }
+
+ public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+ CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/ChainableBitSet.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,119 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * Extension to java.util.BitSet that allows calls to set to be chained, e.g.
+ * bs.set2(1).set(2).set(3), making it easier to define a complex bit set in
+ * a single declaration.
+ */
+public class ChainableBitSet
+ extends BitSet {
+
+ private static final long serialVersionUID = -1105957441212997513L;
+
+ public ChainableBitSet and2(BitSet set) {
+ and(set);
+ return this;
+ }
+
+ public ChainableBitSet addNot2(BitSet set) {
+ andNot(set);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int index) {
+ clear(index);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int... indexes) {
+ for (int i : indexes) clear(i);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int startIndex, int endIndex) {
+ clear(startIndex, endIndex);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int index) {
+ flip(index);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int... indexes) {
+ for (int i : indexes) flip(i);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int startIndex, int endIndex) {
+ flip(startIndex,endIndex);
+ return this;
+ }
+
+ public ChainableBitSet or2(BitSet set) {
+ or(set);
+ return this;
+ }
+
+ public ChainableBitSet xor2(BitSet set) {
+ xor(set);
+ return this;
+ }
+
+ public ChainableBitSet set2(String s) {
+ char[] chars = s.toCharArray();
+ for (char c : chars) set(c);
+ return this;
+ }
+
+ public ChainableBitSet set2(BitSet set) {
+ this.or(set);
+ return this;
+ }
+
+ public ChainableBitSet set2(int ... bits) {
+ for (int n : bits) set(n);
+ return this;
+ }
+
+ public ChainableBitSet set2(int fromIndex, int toIndex) {
+ super.set(fromIndex, toIndex+1);
+ return this;
+ }
+
+ public ChainableBitSet set2(int bitIndex) {
+ super.set(bitIndex);
+ return this;
+ }
+
+ public ChainableBitSet set2(int bitIndex, boolean value) {
+ super.set(bitIndex, value);
+ return this;
+ }
+
+ public ChainableBitSet set2(BitSet set, boolean value) {
+ if (value) return set2(set);
+ else this.andNot(set);
+ return this;
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints contained in a char array
+ */
+public class CharArrayCodepointIterator
+ extends CodepointIterator {
+
+ protected char[] buffer;
+
+ protected CharArrayCodepointIterator() {}
+
+ public CharArrayCodepointIterator(char[] buffer) {
+ this(buffer,0,buffer.length);
+ }
+
+ public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length-n,e);
+ }
+
+ protected char get() {
+ return (position < limit) ? buffer[position++] : (char)-1;
+ }
+
+ protected char get(int index) {
+ if (index < 0 || index >= limit)
+ throw new ArrayIndexOutOfBoundsException(index);
+ return buffer[index];
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharBufferCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints in a java.nio.CharBuffer
+ */
+public class CharBufferCodepointIterator
+ extends CharArrayCodepointIterator {
+
+ public CharBufferCodepointIterator(CharBuffer cb) {
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharSequenceCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,47 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints in a CharSequence (e.g. String, StringBuffer, etc)
+ */
+public class CharSequenceCodepointIterator
+ extends CodepointIterator {
+
+ private CharSequence buffer;
+
+ public CharSequenceCodepointIterator(CharSequence buffer) {
+ this(buffer,0,buffer.length());
+ }
+
+ public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length()-n,e);
+ }
+
+ protected char get() {
+ return buffer.charAt(position++);
+ }
+
+ protected char get(int index) {
+ return buffer.charAt(index);
+ }
+
+}
+
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CharUtils.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,326 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+ private CharUtils() {}
+
+ public static boolean isValidCodepoint(int d) {
+ return d >= 0x000000 && d <= 0x10ffff;
+ }
+
+ public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position();
+ }
+
+ public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position();
+ }
+
+ public static int scan(String s, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(String s, BitSet set) throws InvalidCharacterException {
+ if (s == null) return;
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static boolean inRange(char[] chars, char low, char high) {
+ for (int i = 0; i < chars.length; i++)
+ if (chars[i] < low || chars[i] > high) return false;
+ return true;
+ }
+
+ public static boolean inRange(char[] chars, int low, int high) {
+ for (int i = 0; i < chars.length; i++) {
+ char n = chars[i];
+ int c = (isHighSurrogate(n) &&
+ i + 1 < chars.length &&
+ isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
+ if (c < low || c > high) return false;
+ }
+ return true;
+ }
+
+ public static boolean isSet(int n, BitSet... sets) {
+ if (n == -1) return false;
+ BitSet set = new BitSet();
+ for (BitSet s : sets) set.or(s);
+ return set.get(n);
+ }
+
+ public static void append(StringBuffer buf, int c) {
+ if (isSupplementary(c)) {
+ buf.append(getHighSurrogate(c));
+ buf.append(getLowSurrogate(c));
+ } else buf.append((char)c);
+ }
+
+ public static char getHighSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+ }
+
+ public static char getLowSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+ }
+
+ public static boolean isHighSurrogate(char c) {
+ return c <= '\uDBFF' && c >= '\uD800';
+ }
+
+ public static boolean isLowSurrogate(char c) {
+ return c <= '\uDFFF' && c >= '\uDC00';
+ }
+
+ public static boolean isSupplementary(int c) {
+ return c <= 0x10ffff && c >= 0x010000;
+ }
+
+ public static boolean isSurrogatePair(char high, char low) {
+ return isHighSurrogate(high) && isLowSurrogate(low);
+ }
+
+ public static int toCodePoint(char[] chars) {
+ return toCodePoint(chars[0],chars[1]);
+ }
+
+ public static int toCodePoint(char high, char low) {
+ return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;
+ }
+
+ public static int charAt(String s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return c;
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toCodePoint(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toCodePoint(high,c);
+ }
+ }
+ return c;
+ }
+
+ public static int charAt(StringBuilder s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return c;
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toCodePoint(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toCodePoint(high,c);
+ }
+ }
+ return c;
+ }
+
+ public static void insert(StringBuffer s, int i, int c) {
+ if (i > 0 && i < s.length()) {
+ char ch = s.charAt(i);
+ boolean low = isLowSurrogate(ch);
+ if (low) {
+ if (low && isHighSurrogate(s.charAt(i-1))) {
+ i--;
+ }
+ }
+ }
+ s.insert(i, toString(c));
+ }
+
+ public static void setChar(StringBuilder s, int i, int c) {
+ int l = 1;
+ char ch = s.charAt(i);
+ boolean high = isHighSurrogate(ch);
+ boolean low = isLowSurrogate(ch);
+ if (high || low) {
+ if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+ else {
+ if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+ i--; l++;
+ }
+ }
+ }
+ s.replace(i, i+l, toString(c));
+ }
+
+ public static int size(int c) {
+ return (isSupplementary(c)) ? 2 : 1;
+ }
+
+ private static String supplementaryToString(int c) {
+ StringBuffer buf = new StringBuffer();
+ buf.append((char)getHighSurrogate(c));
+ buf.append((char)getLowSurrogate(c));
+ return buf.toString();
+ }
+
+ public static String toString(int c) {
+ return (isSupplementary(c)) ?
+ supplementaryToString(c) :
+ String.valueOf((char)c);
+ }
+
+
+
+ private static final char LRE = 0x202A;
+ private static final char RLE = 0x202B;
+ private static final char LRO = 0x202D;
+ private static final char RLO = 0x202E;
+ private static final char LRM = 0x200E;
+ private static final char RLM = 0x200F;
+ private static final char PDF = 0x202C;
+
+ /**
+ * Removes leading and trailing bidi controls from the string
+ */
+ public static String stripBidi(String s) {
+ if (s == null || s.length() <= 1) return s;
+ if (charIsBidiControl(s.charAt(0)))
+ s = s.substring(1);
+ if (charIsBidiControl(s.charAt(s.length()-1)))
+ s = s.substring(0,s.length()-1);
+ return s;
+ }
+
+ /**
+ * Returns true if the character is a bidi control
+ */
+ public static boolean charIsBidiControl(char c) {
+ return c == 0x202A ||
+ c == LRE ||
+ c == RLE ||
+ c == LRO ||
+ c == RLO ||
+ c == RLM ||
+ c == LRM ||
+ c == PDF;
+ }
+
+ private static String wrap(String s, char c1, char c2) {
+ StringBuffer buf = new StringBuffer(s);
+ if (buf.length() > 1) {
+ if (buf.charAt(0) != c1) buf.insert(0, c1);
+ if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Wrap the string with Bidi Right-to-Left embed
+ */
+ public static String bidiRLE(String s) {
+ return wrap(s,RLE,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Right-to-Left override
+ */
+ public static String bidiRLO(String s) {
+ return wrap(s,RLO,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Left-to-Right embed
+ */
+ public static String bidiLRE(String s) {
+ return wrap(s,LRE,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Left-to-Right override
+ */
+ public static String bidiLRO(String s) {
+ return wrap(s,LRO,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi RML marks
+ */
+ public static String bidiRLM(String s) {
+ return wrap(s,RLM,RLM);
+ }
+
+ /**
+ * Wrap the string with Bidi LRM marks
+ */
+ public static String bidiLRM(String s) {
+ return wrap(s,LRM,LRM);
+ }
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/CodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints
+ */
+public abstract class CodepointIterator {
+
+ public static CodepointIterator forCharArray(char[] array) {
+ return new CharArrayCodepointIterator(array);
+ }
+
+ public static CodepointIterator forCharSequence(CharSequence seq) {
+ return new CharSequenceCodepointIterator(seq);
+ }
+
+ public static CodepointIterator forByteArray(byte[] array) {
+ return new ByteArrayCodepointIterator(array);
+ }
+
+ public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+ return new CharBufferCodepointIterator(buffer);
+ }
+
+ protected int position = -1;
+ protected int limit = -1;
+
+ protected abstract char get();
+
+ protected abstract char get(int index);
+
+ public boolean hasNext() {
+ return remaining() > 0;
+ }
+
+ public int last() {
+ return (position() > 0) ? get(position() - 1) : -1;
+ }
+
+ public int lastPosition() {
+ int p = position();
+ return (p > -1) ?
+ (p >= limit()) ? p : p - 1 : -1;
+ }
+
+ public char[] nextChars() throws InvalidCharacterException {
+ if (hasNext()) {
+ if (isNextSurrogate()) {
+ char c1 = get();
+ if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+ char c2 = get();
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+ char c2 = get(position()-2);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ }
+ }
+ return new char[] {get()};
+ }
+ return null;
+ }
+
+ public char[] peekChars() throws InvalidCharacterException {
+ return peekChars(position());
+ }
+
+ private char[] peekChars(int pos) throws InvalidCharacterException {
+ if (pos < 0 || pos >= limit()) return null;
+ char c1 = get(pos);
+ if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+ char c2 = get(pos+1);
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+ char c2 = get(pos-1);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c2,c1};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else return new char[] {c1};
+ }
+
+ public int next() throws InvalidCharacterException {
+ char[] chars = nextChars();
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public int peek() throws InvalidCharacterException {
+ char[] chars = peekChars();
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public int peek(int index) throws InvalidCharacterException {
+ char[] chars = peekChars(index);
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public void position(int n) {
+ if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+ position = n;
+ }
+
+ public int position() {
+ return position;
+ }
+
+ public int limit() {
+ return limit;
+ }
+
+ public int remaining() {
+ return limit - position();
+ }
+
+ private boolean isNextSurrogate() {
+ if (!hasNext()) return false;
+ char c = get(position());
+ return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+ }
+
+ public boolean isHigh(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isHighSurrogate(get(index));
+ }
+
+ public boolean isLow(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isLowSurrogate(get(index));
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/FilterCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class FilterCodepointIterator
+ extends CodepointIterator {
+
+ private CodepointIterator internal;
+
+ protected FilterCodepointIterator(CodepointIterator internal) {
+ this.internal = internal;
+ }
+
+ @Override
+ protected char get() {
+ return internal.get();
+ }
+
+ @Override
+ protected char get(int index) {
+ return internal.get(index);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return internal.hasNext();
+ }
+
+ @Override
+ public boolean isHigh(int index) {
+ return internal.isHigh(index);
+ }
+
+ @Override
+ public boolean isLow(int index) {
+ return internal.isLow(index);
+ }
+
+ @Override
+ public int limit() {
+ return internal.limit();
+ }
+
+ @Override
+ public int next() throws InvalidCharacterException {
+ return internal.next();
+ }
+
+ @Override
+ public char[] nextChars() throws InvalidCharacterException {
+ return internal.nextChars();
+ }
+
+ @Override
+ public int peek() throws InvalidCharacterException {
+ return internal.peek();
+ }
+
+ @Override
+ public int peek(int index) throws InvalidCharacterException {
+ return internal.peek(index);
+ }
+
+ @Override
+ public char[] peekChars() throws InvalidCharacterException {
+ return internal.peekChars();
+ }
+
+ @Override
+ public int position() {
+ return internal.position();
+ }
+
+ @Override
+ public int remaining() {
+ return internal.remaining();
+ }
+
+ @Override
+ public void position(int position) {
+ internal.position(position);
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/InvalidCharacterException.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,36 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.io.IOException;
+
+public class InvalidCharacterException
+ extends IOException {
+
+ private static final long serialVersionUID = -7150645484748059676L;
+ private int input;
+
+ public InvalidCharacterException(int input) {
+ this.input = input;
+ }
+
+ @Override
+ public String getMessage() {
+ return "Invalid Character 0x" + Integer.toHexString(input);
+ }
+}
\ No newline at end of file
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/Normalizer.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,175 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.io.IOException;
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+ public enum Mask {
+ NONE,
+ COMPATIBILITY,
+ COMPOSITION
+ }
+
+ public enum Form {
+ D,
+ C(Mask.COMPOSITION),
+ KD(Mask.COMPATIBILITY),
+ KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+
+ private int mask = 0;
+
+ Form(Mask... masks) {
+ for (Mask mask : masks) {
+ this.mask |= (mask.ordinal());
+ }
+ }
+
+ public boolean isCompatibility() {
+ return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+ }
+
+ public boolean isCanonical() {
+ return !isCompatibility();
+ }
+
+ public boolean isComposition() {
+ return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+ }
+ }
+
+ private Normalizer() {}
+
+ /**
+ * Normalize the string using NFKC
+ */
+ public static StringBuilder normalize(String source) throws IOException {
+ return normalize(source, Form.KC);
+ }
+
+ /**
+ * Normalize the string using the specified Form
+ */
+ public static StringBuilder normalize(
+ String source,
+ Form form)
+ throws IOException {
+ return normalize(source, form, new StringBuilder());
+ }
+
+ /**
+ * Normalize the string into the given StringBuffer using the given Form
+ */
+ public static StringBuilder normalize(
+ String source,
+ Form form,
+ StringBuilder buf)
+ throws IOException {
+ UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+ if (source.length() != 0 && ucd != null) {
+ decompose(ucd, source, form, buf);
+ compose(ucd, form, buf);
+ }
+ return buf;
+ }
+
+ private static void decompose(
+ UnicodeCharacterDatabase ucd,
+ String source,
+ Form form,
+ StringBuilder buf)
+ throws IOException {
+ StringBuffer internal = new StringBuffer();
+ CodepointIterator ci = CodepointIterator.forCharSequence(source);
+ boolean canonical = form.isCanonical();
+ while (ci.hasNext()) {
+ int c = ci.next();
+ internal.setLength(0);
+ ucd.decompose(c, canonical, internal);
+ CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+ while(ii.hasNext()) {
+ int ch = ii.next();
+ int i = findInsertionPoint(ucd, buf, ch);
+ buf.insert(i,CharUtils.toString(ch));
+ }
+ }
+
+ }
+
+ private static int findInsertionPoint(
+ UnicodeCharacterDatabase ucd,
+ StringBuilder buf, int c) {
+ int cc = ucd.getCanonicalClass(c);
+ int i = buf.length();
+ if (cc != 0) {
+ int ch;
+ for (; i > 0; i -= CharUtils.size(c)) {
+ ch = CharUtils.charAt(buf, i-1);
+ if (ucd.getCanonicalClass(ch) <= cc) break;
+ }
+ }
+ return i;
+ }
+
+ private static void compose(
+ UnicodeCharacterDatabase ucd,
+ Form form,
+ StringBuilder buf)
+ throws IOException {
+ if (!form.isComposition()) return;
+ int pos = 0;
+ int lc = CharUtils.charAt(buf, pos);
+ int cpos = CharUtils.size(lc);
+ int lcc = ucd.getCanonicalClass(lc);
+ if (lcc != 0) lcc = 256;
+ int len = buf.length();
+ int c;
+ for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.size(c)) {
+ c = CharUtils.charAt(buf,dpos);
+ int cc = ucd.getCanonicalClass(c);
+ int composite = ucd.getPairComposition(lc, c);
+ if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+ CharUtils.setChar(buf, pos, composite);
+ lc = composite;
+ } else {
+ if (cc == 0) {
+ pos = cpos;
+ lc = c;
+ }
+ lcc = cc;
+ CharUtils.setChar(buf,cpos,c);
+ if (buf.length() != len) {
+ dpos += buf.length() - len;
+ len = buf.length();
+ }
+ cpos += CharUtils.size(c);
+ }
+ }
+ buf.setLength(cpos);
+ }
+
+ public static void main(String... args) throws Exception {
+
+ UnicodeCharacterDatabase.main("src/org/apache/abdera/util/unicode/data/ucd.res");
+
+ }
+}
\ No newline at end of file
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/RestrictedCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,120 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * A CodepointIterator implementation that checks output against a BitSet.
+ * If the iterator is set to "scanning only", the iterator will return -1
+ * upon encountering a codepoint not in the set, otherwise the iterator
+ * will throw an InvalidCharacterException
+ */
+public class RestrictedCodepointIterator
+ extends FilterCodepointIterator {
+
+ private BitSet bitset;
+ private boolean scanningOnly = false;
+ private boolean notset = false;
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ BitSet bitset) {
+ this(internal,bitset,false);
+ }
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ BitSet bitset,
+ boolean scanningOnly) {
+ this(internal, bitset, scanningOnly, false);
+ }
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ BitSet bitset,
+ boolean scanningOnly,
+ boolean notset) {
+ super(internal);
+ this.bitset = bitset;
+ this.scanningOnly = scanningOnly;
+ this.notset = notset;
+ }
+
+ public boolean hasNext() {
+ boolean b = super.hasNext();
+ if (scanningOnly) {
+ try {
+ int cp = peek(position());
+ if (b && cp != -1 && check(cp)) return false;
+ } catch (InvalidCharacterException e) { return false; }
+ }
+ return b;
+ }
+
+ @Override
+ public int next() throws InvalidCharacterException {
+ int cp = super.next();
+ if (cp != -1 && check(cp)) {
+ if (scanningOnly) {
+ position(position()-1);
+ return -1;
+ }
+ else throw new InvalidCharacterException(cp);
+ }
+ return cp;
+ }
+
+ private boolean check(int cp) {
+ return (!notset) ? !bitset.get(cp) : bitset.get(cp);
+ }
+
+ @Override
+ public char[] nextChars() throws InvalidCharacterException {
+ char[] chars = super.nextChars();
+ if (chars != null && chars.length > 0) {
+ if (chars.length == 1 && check(chars[0])) {
+ if (scanningOnly) {
+ position(position()-1);
+ return null;
+ }
+ else throw new InvalidCharacterException(chars[0]);
+ } else if (chars.length == 2) {
+ int cp = CharUtils.toCodePoint(chars);
+ if (check(cp)) {
+ if (scanningOnly) {
+ position(position()-2);
+ return null;
+ }
+ else throw new InvalidCharacterException(cp);
+ }
+ }
+ }
+ return chars;
+ }
+
+ public static void main(String... args) throws Exception {
+
+ ChainableBitSet set = new ChainableBitSet().set2('a','b','c');
+ char[] c = {'a','b','c',CharUtils.getHighSurrogate(0x10000),CharUtils.getLowSurrogate(0x10000)};
+
+ CodepointIterator ci = CodepointIterator.forCharArray(c);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+ while(rci.hasNext()) System.out.println(rci.next());
+ }
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/java/org/apache/directory/shared/ldap/util/unicode/UnicodeCharacterDatabase.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,311 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. The ASF licenses this file to You
+ * under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. For additional information regarding
+ * copyright in this work, please see the NOTICE file in the top level
+ * directory of this distribution.
+ */
+package org.apache.directory.shared.ldap.util.unicode;
+
+
+import java.io.BufferedReader;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.Serializable;
+import java.util.BitSet;
+import java.util.HashMap;
+import java.util.zip.GZIPInputStream;
+import java.util.zip.GZIPOutputStream;
+
+
+/**
+ * An implementation of the Unicode Character Database modeled after the
+ * sample normalization demo available at:
+ *
+ * http://www.unicode.org/unicode/reports/tr15/Normalizer.html
+ *
+ * for now, this has been implemented and tested against Unicode 3.2.0. We
+ * need to test is against Unicode 4.0.
+ */
+final class UnicodeCharacterDatabase implements Serializable, Cloneable
+{
+
+ private static final long serialVersionUID = 1596950870716625345L;
+
+ private static final String UCD = "org/apache/directory/shared/ldap/util/unicode/data/ucd.res";
+
+ private final HashMap<Integer, Integer> cc = new HashMap<Integer, Integer>();
+ private final HashMap<Integer, String> decompose = new HashMap<Integer, String>();
+ private final HashMap<Integer, Integer> compose = new HashMap<Integer, Integer>();
+ private final BitSet compatibility = new BitSet();
+ private final BitSet excluded = new BitSet();
+
+ private static UnicodeCharacterDatabase ucd = null;
+
+
+ public synchronized static UnicodeCharacterDatabase getInstance()
+ {
+ if ( ucd == null )
+ {
+ try
+ {
+ ucd = load();
+ }
+ catch ( Exception e )
+ {
+ }
+ }
+ return ucd;
+ }
+
+
+ UnicodeCharacterDatabase()
+ {
+ }
+
+
+ public int getCanonicalClass( int c )
+ {
+ return ( cc.containsKey( c ) ) ? cc.get( c ) : 0;
+ }
+
+
+ public boolean isComposite( int f, int s )
+ {
+ return !( f < 0 || f > 0x10FFFF || s < 0 || s > 0x10FFFF );
+ }
+
+
+ public char getPairComposition( int f, int s )
+ {
+ if ( f < 0 || s > 0x10FFFF || s < 0 || s > 0x10FFFF )
+ return '\uFFFF';
+ Integer i = compose.get( ( f << 16 ) | s );
+ return ( i != null ) ? ( char ) i.intValue() : '\uFFFF';
+ }
+
+
+ public void decompose( int c, boolean canonical, StringBuffer buf )
+ {
+ String d = decompose.get( c );
+ if ( d != null && !( canonical && compatibility.get( c ) ) )
+ {
+ for ( int i = 0; i < d.length(); ++i )
+ {
+ decompose( d.charAt( i ), canonical, buf );
+ }
+ }
+ else
+ CharUtils.append( buf, c );
+ }
+
+
+ public Object clone() throws CloneNotSupportedException
+ {
+ return super.clone();
+ }
+
+
+ public static UnicodeCharacterDatabase load() throws IOException, ClassNotFoundException
+ {
+ ClassLoader cl = Thread.currentThread().getContextClassLoader();
+ InputStream is = cl.getResourceAsStream( UCD );
+ GZIPInputStream gzip = new GZIPInputStream( is );
+ ObjectInputStream ois = new ObjectInputStream( gzip );
+ UnicodeCharacterDatabase ucd = ( UnicodeCharacterDatabase ) ois.readObject();
+ ois.close();
+ gzip.close();
+ is.close();
+ return ucd;
+ }
+
+
+ private static void save( UnicodeCharacterDatabase ucd, String to ) throws IOException
+ {
+ FileOutputStream fos = new FileOutputStream( to );
+ GZIPOutputStream gzip = new GZIPOutputStream( fos );
+ ObjectOutputStream oos = new ObjectOutputStream( gzip );
+ oos.writeObject( ucd );
+ oos.close();
+ gzip.close();
+ fos.close();
+ }
+
+ private static String base;
+ private static String version;
+
+
+ /**
+ * Load the Unicode Character Database from the source files and save as
+ * a gzip compressed, serialized Java class.
+ */
+ public static void main( String... args ) throws Exception
+ {
+ if ( args.length == 0 )
+ {
+ usage();
+ }
+
+ base = ( args.length > 1 ) ? args[1] : UCD;
+ version = ( args.length > 2 ) ? args[2] : "3.2.0";
+ UnicodeCharacterDatabase ucd = UnicodeCharacterDatabase.getInstance();
+
+ if ( ucd == null )
+ {
+ ucd = new UnicodeCharacterDatabase();
+ Loader.load( ucd );
+ }
+
+ save( ucd, args[0] );
+ }
+
+
+ private static void usage()
+ {
+ System.out
+ .println( "Usage:\n java -cp $CLASSPATH com.ibm.usmall.UnicodeCharacterDatabase $filename $datapath" );
+ System.exit( 0 );
+ }
+
+ private static class Loader
+ {
+
+ private static final String EXCLUSIONS = "CompositionExclusions";
+ private static final String UNICODEDATA = "UnicodeData";
+
+
+ // private static final String EXCLUSIONS =
+ // "org/apache/abdera/util/unicode/data/CompositionExclusions-3.2.0.txt";
+ //
+ // private static final String UNICODEDATA =
+ // "org/apache/abdera/util/unicode/data/UnicodeData-3.2.0.txt";
+
+ static String filename( String target )
+ {
+ return base + ( !base.endsWith( "/" ) ? "/" : "" ) + target + "-" + version + ".txt";
+ }
+
+
+ static void load( UnicodeCharacterDatabase ucd ) throws IOException
+ {
+ exclusions( ucd );
+ decomposition( ucd );
+ }
+
+
+ static String stripcomments( String s )
+ {
+ int n = s.indexOf( '#' );
+ return ( n != -1 ) ? s.substring( 0, n ) : s;
+ }
+
+
+ static void exclusions( UnicodeCharacterDatabase ucd ) throws IOException
+ {
+ BufferedReader r = read( filename( EXCLUSIONS ) );
+ String line = null;
+ while ( ( line = r.readLine() ) != null )
+ {
+ line = stripcomments( line );
+ if ( line.length() == 0 )
+ continue;
+ int v = Integer.parseInt( line.trim(), 16 );
+ ucd.excluded.set( v );
+ }
+ r.close();
+ }
+
+
+ static String dehex( String t )
+ {
+ String[] ts = t.split( " " );
+ StringBuffer buf = new StringBuffer();
+ for ( String token : ts )
+ {
+ if ( token.charAt( 0 ) != '<' )
+ {
+ int n = Integer.parseInt( token.trim(), 16 );
+ buf.append( ( char ) n );
+ }
+ }
+ return buf.toString();
+ }
+
+
+ static void decomposition( UnicodeCharacterDatabase ucd ) throws IOException
+ {
+ BufferedReader r = read( filename( UNICODEDATA ) );
+ String line = null;
+ while ( ( line = r.readLine() ) != null )
+ {
+ line = stripcomments( line );
+ if ( line.length() == 0 )
+ continue;
+ String[] tokens = line.split( ";" );
+ int val = Integer.parseInt( tokens[0], 16 );
+ int cc = Integer.parseInt( tokens[3] );
+ ucd.cc.put( val, cc );
+ String decomp = tokens[5];
+ if ( decomp.length() != 0 )
+ {
+ if ( decomp.startsWith( "<" ) )
+ {
+ ucd.compatibility.set( val );
+ }
+ decomp = dehex( decomp );
+ ucd.decompose.put( val, decomp );
+ if ( !ucd.compatibility.get( val ) && !ucd.excluded.get( val ) )
+ {
+ char f = ( decomp.length() > 1 ) ? decomp.charAt( 0 ) : '\u0000';
+ char l = ( decomp.length() > 1 ) ? decomp.charAt( 1 ) : decomp.charAt( 0 );
+ ucd.compose.put( ( f << 16 ) | l, val );
+ }
+ }
+ }
+ hanguls( ucd );
+ r.close();
+ }
+
+
+ // Use the algorithm used in http://www.unicode.org/unicode/reports/tr15/NormalizerBuilder.java
+ static void hanguls( UnicodeCharacterDatabase ucd ) throws IOException
+ {
+ for ( int s = 0; s < 0x2BA4; ++s )
+ {
+ int t = s % 0x001C;
+ char f = ( t != 0 ) ? ( char ) ( 0xAC00 + s - t ) : ( char ) ( 0x1100 + s / 0x024C );
+ char e = ( t != 0 ) ? ( char ) ( 0x11A7 + t ) : ( char ) ( 0x1161 + ( s % 0x024C ) / 0x001C );
+ int pair = ( f << 16 ) | e;
+ int value = s + 0xAC00;
+ ucd.decompose.put( value, String.valueOf( f ) + e );
+ ucd.compose.put( pair, value );
+ }
+ }
+
+
+ static BufferedReader read( String f )
+ {
+ ClassLoader cl = Thread.currentThread().getContextClassLoader();
+ InputStream in = cl.getResourceAsStream( f );
+ InputStreamReader r = new InputStreamReader( in );
+ BufferedReader buf = new BufferedReader( r );
+ return buf;
+ }
+
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res?view=auto&rev=490270
==============================================================================
Binary file - no diff available.
Propchange: directory/sandbox/elecharny/trunks/shared/ldap/src/main/resources/org/apache/directory/shared/ldap/util/unicode/data/ucd.res
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ByteArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,45 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+
+/**
+ * Iterate over Unicode codepoints decoded from an array of bytes
+ */
+public class ByteArrayCodepointIterator
+ extends CharArrayCodepointIterator {
+
+ public ByteArrayCodepointIterator(byte[] bytes) {
+ this(bytes,Charset.defaultCharset());
+ }
+
+ public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+ this(bytes,Charset.forName(charset));
+ }
+
+ public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+ CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/ChainableBitSet.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,119 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * Extension to java.util.BitSet that allows calls to set to be chained, e.g.
+ * bs.set2(1).set(2).set(3), making it easier to define a complex bit set in
+ * a single declaration.
+ */
+public class ChainableBitSet
+ extends BitSet {
+
+ private static final long serialVersionUID = -1105957441212997513L;
+
+ public ChainableBitSet and2(BitSet set) {
+ and(set);
+ return this;
+ }
+
+ public ChainableBitSet addNot2(BitSet set) {
+ andNot(set);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int index) {
+ clear(index);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int... indexes) {
+ for (int i : indexes) clear(i);
+ return this;
+ }
+
+ public ChainableBitSet clear2(int startIndex, int endIndex) {
+ clear(startIndex, endIndex);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int index) {
+ flip(index);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int... indexes) {
+ for (int i : indexes) flip(i);
+ return this;
+ }
+
+ public ChainableBitSet flip2(int startIndex, int endIndex) {
+ flip(startIndex,endIndex);
+ return this;
+ }
+
+ public ChainableBitSet or2(BitSet set) {
+ or(set);
+ return this;
+ }
+
+ public ChainableBitSet xor2(BitSet set) {
+ xor(set);
+ return this;
+ }
+
+ public ChainableBitSet set2(String s) {
+ char[] chars = s.toCharArray();
+ for (char c : chars) set(c);
+ return this;
+ }
+
+ public ChainableBitSet set2(BitSet set) {
+ this.or(set);
+ return this;
+ }
+
+ public ChainableBitSet set2(int ... bits) {
+ for (int n : bits) set(n);
+ return this;
+ }
+
+ public ChainableBitSet set2(int fromIndex, int toIndex) {
+ super.set(fromIndex, toIndex+1);
+ return this;
+ }
+
+ public ChainableBitSet set2(int bitIndex) {
+ super.set(bitIndex);
+ return this;
+ }
+
+ public ChainableBitSet set2(int bitIndex, boolean value) {
+ super.set(bitIndex, value);
+ return this;
+ }
+
+ public ChainableBitSet set2(BitSet set, boolean value) {
+ if (value) return set2(set);
+ else this.andNot(set);
+ return this;
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharArrayCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints contained in a char array
+ */
+public class CharArrayCodepointIterator
+ extends CodepointIterator {
+
+ protected char[] buffer;
+
+ protected CharArrayCodepointIterator() {}
+
+ public CharArrayCodepointIterator(char[] buffer) {
+ this(buffer,0,buffer.length);
+ }
+
+ public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length-n,e);
+ }
+
+ protected char get() {
+ return (position < limit) ? buffer[position++] : (char)-1;
+ }
+
+ protected char get(int index) {
+ if (index < 0 || index >= limit)
+ throw new ArrayIndexOutOfBoundsException(index);
+ return buffer[index];
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharBufferCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,34 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints in a java.nio.CharBuffer
+ */
+public class CharBufferCodepointIterator
+ extends CharArrayCodepointIterator {
+
+ public CharBufferCodepointIterator(CharBuffer cb) {
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharSequenceCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,47 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Iterate over Unicode codepoints in a CharSequence (e.g. String, StringBuffer, etc)
+ */
+public class CharSequenceCodepointIterator
+ extends CodepointIterator {
+
+ private CharSequence buffer;
+
+ public CharSequenceCodepointIterator(CharSequence buffer) {
+ this(buffer,0,buffer.length());
+ }
+
+ public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length()-n,e);
+ }
+
+ protected char get() {
+ return buffer.charAt(position++);
+ }
+
+ protected char get(int index) {
+ return buffer.charAt(index);
+ }
+
+}
+
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CharUtils.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,326 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.util.BitSet;
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+ private CharUtils() {}
+
+ public static boolean isValidCodepoint(int d) {
+ return d >= 0x000000 && d <= 0x10ffff;
+ }
+
+ public static int scanNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static int scanNot(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static int scan(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position();
+ }
+
+ public static int scan(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position();
+ }
+
+ public static int scan(String s, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,true);
+ while (rci.hasNext()) rci.next();
+ return rci.position;
+ }
+
+ public static void verifyNot(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verifyNot(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(CodepointIterator ci, BitSet set) throws InvalidCharacterException {
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(char[] array, BitSet set) throws InvalidCharacterException {
+ CodepointIterator ci = CodepointIterator.forCharArray(array);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(String s, BitSet set) throws InvalidCharacterException {
+ if (s == null) return;
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ RestrictedCodepointIterator rci = new RestrictedCodepointIterator(ci,set,false);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static boolean inRange(char[] chars, char low, char high) {
+ for (int i = 0; i < chars.length; i++)
+ if (chars[i] < low || chars[i] > high) return false;
+ return true;
+ }
+
+ public static boolean inRange(char[] chars, int low, int high) {
+ for (int i = 0; i < chars.length; i++) {
+ char n = chars[i];
+ int c = (isHighSurrogate(n) &&
+ i + 1 < chars.length &&
+ isLowSurrogate(chars[i+1])) ? toCodePoint(n,chars[i++]) : n;
+ if (c < low || c > high) return false;
+ }
+ return true;
+ }
+
+ public static boolean isSet(int n, BitSet... sets) {
+ if (n == -1) return false;
+ BitSet set = new BitSet();
+ for (BitSet s : sets) set.or(s);
+ return set.get(n);
+ }
+
+ public static void append(StringBuffer buf, int c) {
+ if (isSupplementary(c)) {
+ buf.append(getHighSurrogate(c));
+ buf.append(getLowSurrogate(c));
+ } else buf.append((char)c);
+ }
+
+ public static char getHighSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+ }
+
+ public static char getLowSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+ }
+
+ public static boolean isHighSurrogate(char c) {
+ return c <= '\uDBFF' && c >= '\uD800';
+ }
+
+ public static boolean isLowSurrogate(char c) {
+ return c <= '\uDFFF' && c >= '\uDC00';
+ }
+
+ public static boolean isSupplementary(int c) {
+ return c <= 0x10ffff && c >= 0x010000;
+ }
+
+ public static boolean isSurrogatePair(char high, char low) {
+ return isHighSurrogate(high) && isLowSurrogate(low);
+ }
+
+ public static int toCodePoint(char[] chars) {
+ return toCodePoint(chars[0],chars[1]);
+ }
+
+ public static int toCodePoint(char high, char low) {
+ return ((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000;
+ }
+
+ public static int charAt(String s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return c;
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toCodePoint(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toCodePoint(high,c);
+ }
+ }
+ return c;
+ }
+
+ public static int charAt(StringBuilder s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return c;
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toCodePoint(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toCodePoint(high,c);
+ }
+ }
+ return c;
+ }
+
+ public static void insert(StringBuffer s, int i, int c) {
+ if (i > 0 && i < s.length()) {
+ char ch = s.charAt(i);
+ boolean low = isLowSurrogate(ch);
+ if (low) {
+ if (low && isHighSurrogate(s.charAt(i-1))) {
+ i--;
+ }
+ }
+ }
+ s.insert(i, toString(c));
+ }
+
+ public static void setChar(StringBuilder s, int i, int c) {
+ int l = 1;
+ char ch = s.charAt(i);
+ boolean high = isHighSurrogate(ch);
+ boolean low = isLowSurrogate(ch);
+ if (high || low) {
+ if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+ else {
+ if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+ i--; l++;
+ }
+ }
+ }
+ s.replace(i, i+l, toString(c));
+ }
+
+ public static int size(int c) {
+ return (isSupplementary(c)) ? 2 : 1;
+ }
+
+ private static String supplementaryToString(int c) {
+ StringBuffer buf = new StringBuffer();
+ buf.append((char)getHighSurrogate(c));
+ buf.append((char)getLowSurrogate(c));
+ return buf.toString();
+ }
+
+ public static String toString(int c) {
+ return (isSupplementary(c)) ?
+ supplementaryToString(c) :
+ String.valueOf((char)c);
+ }
+
+
+
+ private static final char LRE = 0x202A;
+ private static final char RLE = 0x202B;
+ private static final char LRO = 0x202D;
+ private static final char RLO = 0x202E;
+ private static final char LRM = 0x200E;
+ private static final char RLM = 0x200F;
+ private static final char PDF = 0x202C;
+
+ /**
+ * Removes leading and trailing bidi controls from the string
+ */
+ public static String stripBidi(String s) {
+ if (s == null || s.length() <= 1) return s;
+ if (charIsBidiControl(s.charAt(0)))
+ s = s.substring(1);
+ if (charIsBidiControl(s.charAt(s.length()-1)))
+ s = s.substring(0,s.length()-1);
+ return s;
+ }
+
+ /**
+ * Returns true if the character is a bidi control
+ */
+ public static boolean charIsBidiControl(char c) {
+ return c == 0x202A ||
+ c == LRE ||
+ c == RLE ||
+ c == LRO ||
+ c == RLO ||
+ c == RLM ||
+ c == LRM ||
+ c == PDF;
+ }
+
+ private static String wrap(String s, char c1, char c2) {
+ StringBuffer buf = new StringBuffer(s);
+ if (buf.length() > 1) {
+ if (buf.charAt(0) != c1) buf.insert(0, c1);
+ if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Wrap the string with Bidi Right-to-Left embed
+ */
+ public static String bidiRLE(String s) {
+ return wrap(s,RLE,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Right-to-Left override
+ */
+ public static String bidiRLO(String s) {
+ return wrap(s,RLO,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Left-to-Right embed
+ */
+ public static String bidiLRE(String s) {
+ return wrap(s,LRE,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi Left-to-Right override
+ */
+ public static String bidiLRO(String s) {
+ return wrap(s,LRO,PDF);
+ }
+
+ /**
+ * Wrap the string with Bidi RML marks
+ */
+ public static String bidiRLM(String s) {
+ return wrap(s,RLM,RLM);
+ }
+
+ /**
+ * Wrap the string with Bidi LRM marks
+ */
+ public static String bidiLRM(String s) {
+ return wrap(s,LRM,LRM);
+ }
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/CodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,167 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+import java.nio.CharBuffer;
+
+/**
+ * Iterate over Unicode codepoints
+ */
+public abstract class CodepointIterator {
+
+ public static CodepointIterator forCharArray(char[] array) {
+ return new CharArrayCodepointIterator(array);
+ }
+
+ public static CodepointIterator forCharSequence(CharSequence seq) {
+ return new CharSequenceCodepointIterator(seq);
+ }
+
+ public static CodepointIterator forByteArray(byte[] array) {
+ return new ByteArrayCodepointIterator(array);
+ }
+
+ public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+ return new CharBufferCodepointIterator(buffer);
+ }
+
+ protected int position = -1;
+ protected int limit = -1;
+
+ protected abstract char get();
+
+ protected abstract char get(int index);
+
+ public boolean hasNext() {
+ return remaining() > 0;
+ }
+
+ public int last() {
+ return (position() > 0) ? get(position() - 1) : -1;
+ }
+
+ public int lastPosition() {
+ int p = position();
+ return (p > -1) ?
+ (p >= limit()) ? p : p - 1 : -1;
+ }
+
+ public char[] nextChars() throws InvalidCharacterException {
+ if (hasNext()) {
+ if (isNextSurrogate()) {
+ char c1 = get();
+ if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+ char c2 = get();
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+ char c2 = get(position()-2);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ }
+ }
+ return new char[] {get()};
+ }
+ return null;
+ }
+
+ public char[] peekChars() throws InvalidCharacterException {
+ return peekChars(position());
+ }
+
+ private char[] peekChars(int pos) throws InvalidCharacterException {
+ if (pos < 0 || pos >= limit()) return null;
+ char c1 = get(pos);
+ if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+ char c2 = get(pos+1);
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+ char c2 = get(pos-1);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c2,c1};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else return new char[] {c1};
+ }
+
+ public int next() throws InvalidCharacterException {
+ char[] chars = nextChars();
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public int peek() throws InvalidCharacterException {
+ char[] chars = peekChars();
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public int peek(int index) throws InvalidCharacterException {
+ char[] chars = peekChars(index);
+ return (chars == null) ? -1 :
+ (chars.length == 1) ? chars[0] :
+ CharUtils.toCodePoint(chars[0], chars[1]);
+ }
+
+ public void position(int n) {
+ if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+ position = n;
+ }
+
+ public int position() {
+ return position;
+ }
+
+ public int limit() {
+ return limit;
+ }
+
+ public int remaining() {
+ return limit - position();
+ }
+
+ private boolean isNextSurrogate() {
+ if (!hasNext()) return false;
+ char c = get(position());
+ return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+ }
+
+ public boolean isHigh(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isHighSurrogate(get(index));
+ }
+
+ public boolean isLow(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isLowSurrogate(get(index));
+ }
+
+}
Added: directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java
URL: http://svn.apache.org/viewvc/directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java?view=auto&rev=490270
==============================================================================
--- directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java (added)
+++ directory/sandbox/elecharny/trunks/shared/ldap/src/main/unicode/FilterCodepointIterator.java Tue Dec 26 00:37:23 2006
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.directory.shared.ldap.util.unicode;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class FilterCodepointIterator
+ extends CodepointIterator {
+
+ private CodepointIterator internal;
+
+ protected FilterCodepointIterator(CodepointIterator internal) {
+ this.internal = internal;
+ }
+
+ @Override
+ protected char get() {
+ return internal.get();
+ }
+
+ @Override
+ protected char get(int index) {
+ return internal.get(index);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return internal.hasNext();
+ }
+
+ @Override
+ public boolean isHigh(int index) {
+ return internal.isHigh(index);
+ }
+
+ @Override
+ public boolean isLow(int index) {
+ return internal.isLow(index);
+ }
+
+ @Override
+ public int limit() {
+ return internal.limit();
+ }
+
+ @Override
+ public int next() throws InvalidCharacterException {
+ return internal.next();
+ }
+
+ @Override
+ public char[] nextChars() throws InvalidCharacterException {
+ return internal.nextChars();
+ }
+
+ @Override
+ public int peek() throws InvalidCharacterException {
+ return internal.peek();
+ }
+
+ @Override
+ public int peek(int index) throws InvalidCharacterException {
+ return internal.peek(index);
+ }
+
+ @Override
+ public char[] peekChars() throws InvalidCharacterException {
+ return internal.peekChars();
+ }
+
+ @Override
+ public int position() {
+ return internal.position();
+ }
+
+ @Override
+ public int remaining() {
+ return internal.remaining();
+ }
+
+ @Override
+ public void position(int position) {
+ internal.position(position);
+ }
+
+}