You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2008/01/01 05:59:47 UTC
svn commit: r607801 [1/5] - in /incubator/abdera/java/trunk:
client/src/main/java/org/apache/abdera/protocol/client/
core/src/main/java/org/apache/abdera/util/
dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/
dependencies/i18n/src/main/java/o...
Author: jmsnell
Date: Mon Dec 31 20:59:44 2007
New Revision: 607801
URL: http://svn.apache.org/viewvc?rev=607801&view=rev
Log:
Significant refactoring and improvements to the i18n unicode, text handling and io code.
Most of the text-manipulation and unicode related code is now bundled together in the
org.apache.abdera.i18n.text package. The unicode and nameprep implementation have been
improved using inversion sets where appropriate. This will yield a performance increase
and a decrease in memory consumption for certain operations. Also, the Nameprep impl previously
used to load and parse a file to initialize the algorithm; this is now removed. Many other
improvements are made; most of which are internal. I'll be doing some additional refactoring
here and there.
Added:
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/UnicodeCharacterDatabase.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/UnicodeData.txt
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/CharsetSniffingInputStream.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/DynamicPushbackInputStream.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/FilteredCharReader.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/InputStreamDataSource.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/PeekAheadInputStream.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/PipeChannel.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/io/RewindableInputStream.java
incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/unicode/
incubator/abdera/java/trunk/examples/src/main/java/org/apache/abdera/examples/unicode/NormalizationExample.java
Removed:
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Escaping.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Nameprep.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/NameprepCodepointIterator.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/Punycode.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/unicode/
Modified:
incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java
incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java
incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java
incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java
incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestIRI.java
incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNFKC.java
incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestNameprep.java
incubator/abdera/java/trunk/dependencies/i18n/src/test/java/org/apache/abdera/i18n/test/iri/TestPunycode.java
incubator/abdera/java/trunk/extensions/converters/src/main/java/org/apache/abdera/converter/impl/ContentConverter.java
incubator/abdera/java/trunk/extensions/json/src/main/java/org/apache/abdera/ext/json/JSONUtil.java
incubator/abdera/java/trunk/extensions/main/src/main/java/org/apache/abdera/ext/bidi/BidiHelper.java
incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/FOMEntry.java
incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMSniffingInputStream.java
incubator/abdera/java/trunk/parser/src/main/java/org/apache/abdera/parser/stax/util/FOMXmlVersionInputStream.java
incubator/abdera/java/trunk/parser/src/test/java/org/apache/abdera/test/parser/EncodingTest.java
incubator/abdera/java/trunk/parser/src/test/java/org/apache/abdera/test/parser/stax/FeedParserTest.java
incubator/abdera/java/trunk/protocol/src/main/java/org/apache/abdera/protocol/util/AbstractMessage.java
incubator/abdera/java/trunk/protocol/src/main/java/org/apache/abdera/protocol/util/EncodingUtil.java
incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/ResponseContext.java
incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractCollectionProvider.java
incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractResponseContext.java
incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/impl/AbstractServiceProvider.java
incubator/abdera/java/trunk/server/src/main/java/org/apache/abdera/protocol/server/servlet/AbstractFilter.java
Modified: incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java (original)
+++ incubator/abdera/java/trunk/client/src/main/java/org/apache/abdera/protocol/client/RequestOptions.java Mon Dec 31 20:59:44 2007
@@ -26,8 +26,8 @@
import javax.activation.MimeType;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-import org.apache.abdera.i18n.iri.Escaping;
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
import org.apache.abdera.protocol.Request;
import org.apache.abdera.protocol.util.AbstractRequest;
import org.apache.abdera.protocol.util.CacheControlUtil;
@@ -425,7 +425,7 @@
slug.indexOf((char)13) > -1)
throw new IllegalArgumentException(
Messages.get("SLUG.BAD.CHARACTERS"));
- return setHeader("Slug", Escaping.encode(slug,Profile.ASCIISANSCRLF));
+ return setHeader("Slug", UrlEncoding.encode(slug,Profile.ASCIISANSCRLF.filter()));
}
/**
Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java (original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlRestrictedCharReader.java Mon Dec 31 20:59:44 2007
@@ -17,13 +17,12 @@
*/
package org.apache.abdera.util;
-import java.io.FilterReader;
-import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
+import org.apache.abdera.i18n.text.io.FilteredCharReader;
import org.apache.abdera.util.XmlUtil.XMLVersion;
/**
@@ -35,15 +34,12 @@
* is a valid XML character itself.
*/
public class XmlRestrictedCharReader
- extends FilterReader {
+ extends FilteredCharReader {
/**
* The XMLVersion determines which set of restrictions to apply depending
* on the XML version being parsed
*/
- private final XMLVersion version;
- private final char replacement;
-
public XmlRestrictedCharReader(InputStream in) {
this(new InputStreamReader(in));
}
@@ -120,37 +116,7 @@
Reader in,
XMLVersion version,
char replacement) {
- super(in);
- this.version = version;
- this.replacement = replacement;
- if (replacement != 0 &&
- ((!Character.isValidCodePoint(replacement)) ||
- XmlUtil.restricted(version,replacement)))
- throw new IllegalArgumentException();
- }
-
- @Override
- public int read() throws IOException {
- int c = -1;
- if (replacement == 0) {
- while(((c = super.read()) != -1 && XmlUtil.restricted(version,c))) {}
- } else {
- c = super.read();
- if (c != -1 && XmlUtil.restricted(version,c)) c = replacement;
- }
- return c;
- }
-
- @Override
- public int read(char[] cbuf, int off, int len) throws IOException {
- int n = off;
- for (; n < Math.min(len,cbuf.length-off); n++) {
- int r = read();
- if (r != -1) cbuf[n] = (char)r;
- else break;
- }
- n -= off;
- return n <= 0 ? -1 : n;
+ super(in,version.filter(),replacement);
}
}
Modified: incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java (original)
+++ incubator/abdera/java/trunk/core/src/main/java/org/apache/abdera/util/XmlUtil.java Mon Dec 31 20:59:44 2007
@@ -18,10 +18,31 @@
package org.apache.abdera.util;
import org.apache.abdera.i18n.ChainableBitSet;
+import org.apache.abdera.i18n.text.Filter;
public class XmlUtil {
- public enum XMLVersion { XML10, XML11 };
+ public enum XMLVersion {
+ XML10,
+ XML11;
+ private final Filter filter;
+ XMLVersion() {
+ this.filter = new XmlFilter(this);
+ }
+ public Filter filter() {
+ return filter;
+ }
+ };
+
+ private static class XmlFilter implements Filter {
+ private final XMLVersion version;
+ XmlFilter(XMLVersion version) {
+ this.version = version;
+ }
+ public boolean accept(int c) {
+ return !restricted(version, c);
+ }
+ }
private static final ChainableBitSet restrictedchar10 =
new ChainableBitSet().set2(0, 8)
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/HttpScheme.java Mon Dec 31 20:59:44 2007
@@ -17,8 +17,8 @@
*/
package org.apache.abdera.i18n.iri;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
class HttpScheme extends AbstractScheme {
@@ -54,8 +54,8 @@
host,
port,
IRI.normalize(iri.getPath()),
- Escaping.encode(Escaping.decode(iri.getQuery()),Profile.IQUERY),
- Escaping.encode(Escaping.decode(iri.getFragment()),Profile.IFRAGMENT)
+ UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()),Profile.IQUERY.filter()),
+ UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()),Profile.IFRAGMENT.filter())
);
}
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IDNA.java Mon Dec 31 20:59:44 2007
@@ -21,8 +21,10 @@
import java.io.Serializable;
import java.net.UnknownHostException;
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.Nameprep;
+import org.apache.abdera.i18n.text.Punycode;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
/**
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/iri/IRI.java Mon Dec 31 20:59:44 2007
@@ -24,11 +24,13 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.InvalidCharacterException;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
-import org.apache.abdera.i18n.unicode.Normalizer;
-import org.apache.abdera.i18n.unicode.UnicodeCharacterDatabase;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.InvalidCharacterException;
+import org.apache.abdera.i18n.text.Nameprep;
+import org.apache.abdera.i18n.text.Normalizer;
+import org.apache.abdera.i18n.text.UrlEncoding;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
+import org.apache.abdera.i18n.text.data.UnicodeCharacterDatabase;
public final class IRI
implements Serializable,
@@ -139,10 +141,10 @@
private void init() {
a_host = IDNA.toASCII(host);
- a_fragment = Escaping.encode(fragment,Profile.FRAGMENT);
- a_path = Escaping.encode(path, Profile.PATH);
- a_query = Escaping.encode(query,Profile.QUERY, Profile.PATH);
- a_userinfo = Escaping.encode(userinfo,Profile.USERINFO);
+ a_fragment = UrlEncoding.encode(fragment,Profile.FRAGMENT.filter());
+ a_path = UrlEncoding.encode(path, Profile.PATH.filter());
+ a_query = UrlEncoding.encode(query,Profile.QUERY.filter(), Profile.PATH.filter());
+ a_userinfo = UrlEncoding.encode(userinfo,Profile.USERINFO.filter());
a_authority = buildASCIIAuthority();
}
@@ -285,9 +287,9 @@
buildAuthority(buf,aui,ah,port);
return buf.toString();
} else {
- return Escaping.encode(
+ return UrlEncoding.encode(
authority,
- Profile.AUTHORITY);
+ Profile.AUTHORITY.filter());
}
}
@@ -485,8 +487,8 @@
iri.getHost(),
iri.getPort(),
normalize(iri.getPath()),
- Escaping.encode(Escaping.decode(iri.getQuery()),Profile.IQUERY),
- Escaping.encode(Escaping.decode(iri.getFragment()),Profile.IFRAGMENT)
+ UrlEncoding.encode(UrlEncoding.decode(iri.getQuery()),Profile.IQUERY.filter()),
+ UrlEncoding.encode(UrlEncoding.decode(iri.getFragment()),Profile.IFRAGMENT.filter())
);
}
@@ -512,10 +514,10 @@
if (segments[n] != null) {
if (buf.length() > 1) buf.append('/');
buf.append(
- Escaping.encode(
- Escaping.decode(
+ UrlEncoding.encode(
+ UrlEncoding.decode(
segments[n]),
- Profile.IPATHNODELIMS));
+ Profile.IPATHNODELIMS.filter()));
}
}
if (path.endsWith("/") || path.endsWith("/."))
@@ -552,8 +554,8 @@
buf.append(':');
}
buf.append(getSchemeSpecificPart());
- return Escaping.encode(buf.toString(),
- Profile.SCHEMESPECIFICPART);
+ return UrlEncoding.encode(buf.toString(),
+ Profile.SCHEMESPECIFICPART.filter());
}
public String toASCIIString() {
@@ -568,7 +570,7 @@
}
public String toBIDIString() {
- return CharUtils.bidiLRE(toString());
+ return CharUtils.wrapBidi(toString(),CharUtils.LRE);
}
public java.net.URI toURI()
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/lang/Lang.java Mon Dec 31 20:59:44 2007
@@ -21,9 +21,9 @@
import java.util.Iterator;
import java.util.Locale;
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.io.InvalidCharacterException;
-import org.apache.abdera.i18n.io.CharUtils.Profile;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.InvalidCharacterException;
+import org.apache.abdera.i18n.text.CharUtils.Profile;
/**
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Operation.java Mon Dec 31 20:59:44 2007
@@ -25,9 +25,9 @@
import java.util.List;
import java.util.Map;
-import org.apache.abdera.i18n.io.CharUtils;
-import org.apache.abdera.i18n.iri.Escaping;
-import org.apache.abdera.i18n.unicode.Normalizer;
+import org.apache.abdera.i18n.text.CharUtils;
+import org.apache.abdera.i18n.text.Normalizer;
+import org.apache.abdera.i18n.text.UrlEncoding;
@SuppressWarnings("unchecked")
public abstract class Operation
@@ -144,21 +144,17 @@
if (val == null) return null;
if (val.getClass().isArray()) {
if (val instanceof byte[]) {
- return Escaping.encode((byte[])val);
+ return UrlEncoding.encode((byte[])val);
} else if (val instanceof char[]) {
- try {
- String chars = new String((char[])val);
- return Escaping.encode(
- !context.isNormalizing() ? chars :
- Normalizer.normalize(
- chars,
- Normalizer.Form.C).toString(),
- context.isIri() ?
- CharUtils.Profile.IUNRESERVED :
- CharUtils.Profile.UNRESERVED);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
+ String chars = new String((char[])val);
+ return UrlEncoding.encode(
+ !context.isNormalizing() ? chars :
+ Normalizer.normalize(
+ chars,
+ Normalizer.Form.C).toString(),
+ context.isIri() ?
+ CharUtils.Profile.IUNRESERVED.filter() :
+ CharUtils.Profile.UNRESERVED.filter());
} else if (val instanceof short[]) {
StringBuilder buf = new StringBuilder();
short[] array = (short[]) val;
@@ -206,18 +202,18 @@
return toString(((Template)val).getPattern(),context);
} else if (val instanceof InputStream) {
try {
- return Escaping.encode((InputStream)val);
+ return UrlEncoding.encode((InputStream)val);
} catch (IOException e) {
throw new RuntimeException(e);
}
} else if (val instanceof Readable) {
try {
- return Escaping.encode(
+ return UrlEncoding.encode(
(Readable)val,
"UTF-8",
context.isIri() ?
- CharUtils.Profile.IUNRESERVED :
- CharUtils.Profile.UNRESERVED);
+ CharUtils.Profile.IUNRESERVED.filter() :
+ CharUtils.Profile.UNRESERVED.filter());
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -227,7 +223,7 @@
context.isIri(),
context.isNormalizing());
} else if (val instanceof Byte) {
- return Escaping.encode(((Byte)val).byteValue());
+ return UrlEncoding.encode(((Byte)val).byteValue());
} else if (val instanceof Iterable) {
StringBuilder buf = new StringBuilder();
Iterable i = (Iterable) val;
@@ -308,18 +304,14 @@
CharSequence val,
boolean isiri,
boolean normalizing) {
- try {
- return Escaping.encode(
- !normalizing ? val :
- Normalizer.normalize(
- val,
- Normalizer.Form.C).toString(),
- isiri ?
- CharUtils.Profile.IUNRESERVED :
- CharUtils.Profile.UNRESERVED);
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
+ return UrlEncoding.encode(
+ !normalizing ? val :
+ Normalizer.normalize(
+ val,
+ Normalizer.Form.C).toString(),
+ isiri ?
+ CharUtils.Profile.IUNRESERVED.filter() :
+ CharUtils.Profile.UNRESERVED.filter());
}
private static final class DefaultOperation extends Operation {
Modified: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java?rev=607801&r1=607800&r2=607801&view=diff
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java (original)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/templates/Template.java Mon Dec 31 20:59:44 2007
@@ -27,7 +27,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.abdera.i18n.io.CharUtils;
+import org.apache.abdera.i18n.text.CharUtils;
/**
* Used to evaluate a URI Template.
@@ -82,7 +82,7 @@
token,
forDisplay(token));
}
- return CharUtils.bidiLRO(pattern);
+ return CharUtils.wrapBidi(pattern, CharUtils.LRO);
}
private static String forDisplay(String token) {
@@ -91,7 +91,7 @@
buf.append('{');
if (splits.length == 1) {
String[] pair = splits[0].split("\\s*=\\s*");
- buf.append(CharUtils.bidiLRE(pair[0]));
+ buf.append(CharUtils.wrapBidi(pair[0], CharUtils.LRE));
if (pair.length > 1) {
buf.append('=');
buf.append(pair[1]);
@@ -106,7 +106,7 @@
for (String var : vars) {
if (i++ > 0) buf.append(",");
String[] pair = var.split("\\s*=\\s*");
- buf.append(CharUtils.bidiLRE(pair[0]));
+ buf.append(CharUtils.wrapBidi(pair[0], CharUtils.LRE));
if (pair.length > 1) {
buf.append('=');
buf.append(pair[1]);
@@ -243,7 +243,7 @@
StringBuilder buf = new StringBuilder();
buf.append("V:" + getPatternForDisplay());
buf.append('\n');
- buf.append("L:" + CharUtils.bidiLRO(getPattern()));
+ buf.append("L:" + CharUtils.wrapBidi(getPattern(),CharUtils.LRO));
buf.append('\n');
return buf.toString();
}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CharUtils.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,817 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+
+/**
+ * General utilities for dealing with Unicode characters
+ */
+public final class CharUtils {
+
+ private CharUtils() {}
+
+ public static boolean isValid(int c) {
+ return c >= 0x000000 && c <= 0x10ffff;
+ }
+
+ public static boolean isValid(Codepoint c) {
+ return isValid(c.getValue());
+ }
+
+ public static boolean inRange(char[] chars, char low, char high) {
+ for (int i = 0; i < chars.length; i++)
+ if (chars[i] < low || chars[i] > high) return false;
+ return true;
+ }
+
+ public static boolean inRange(char[] chars, int low, int high) {
+ for (int i = 0; i < chars.length; i++) {
+ char n = chars[i];
+ Codepoint cp = (isHighSurrogate(n) &&
+ i + 1 < chars.length &&
+ isLowSurrogate(chars[i+1])) ?
+ toSupplementary(n,chars[i++]) :
+ new Codepoint(n);
+ int c = cp.getValue();
+ if (c < low || c > high) return false;
+ }
+ return true;
+ }
+
+ public static boolean inRange(int codepoint, int low, int high) {
+ return codepoint >= low && codepoint <= high;
+ }
+
+ public static void append(Appendable buf, Codepoint c) {
+ append(buf,c.getValue());
+ }
+
+ public static void append(Appendable buf, int c) {
+ try {
+ if (isSupplementary(c)) {
+ buf.append(getHighSurrogate(c));
+ buf.append(getLowSurrogate(c));
+ } else buf.append((char)c);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static char getHighSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)((0xD800 - (0x10000 >> 10)) + (c >> 10)) : 0;
+ }
+
+ public static char getLowSurrogate(int c) {
+ return (c >= 0x10000) ?
+ (char)(0xDC00 + (c & 0x3FF)) : (char)c;
+ }
+
+ public static boolean isHighSurrogate(char c) {
+ return c <= '\uDBFF' && c >= '\uD800';
+ }
+
+ public static boolean isLowSurrogate(char c) {
+ return c <= '\uDFFF' && c >= '\uDC00';
+ }
+
+ public static boolean isSupplementary(int c) {
+ return c <= 0x10ffff && c >= 0x010000;
+ }
+
+ public static boolean isSurrogatePair(char high, char low) {
+ return isHighSurrogate(high) && isLowSurrogate(low);
+ }
+
+ public static Codepoint toSupplementary(char high, char low) {
+ if (!isHighSurrogate(high))
+ throw new IllegalArgumentException("Invalid High Surrogate");
+ if (!isLowSurrogate(low))
+ throw new IllegalArgumentException("Invalid Low Surrogate");
+ return new Codepoint(((high - '\uD800') << 10) + (low - '\uDC00') + 0x010000);
+ }
+
+ public static Codepoint codepointAt(String s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c);
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toSupplementary(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toSupplementary(high,c);
+ }
+ }
+ return new Codepoint(c);
+ }
+
+ public static Codepoint codepointAt(CharSequence s, int i) {
+ char c = s.charAt(i);
+ if (c < 0xD800 || c > 0xDFFF) return new Codepoint(c);
+ if (isHighSurrogate(c)) {
+ if (s.length() != i) {
+ char low = s.charAt(i+1);
+ if (isLowSurrogate(low)) return toSupplementary(c,low);
+ }
+ } else if (isLowSurrogate(c)) {
+ if (i >= 1) {
+ char high = s.charAt(i-1);
+ if (isHighSurrogate(high)) return toSupplementary(high,c);
+ }
+ }
+ return new Codepoint(c);
+ }
+
+ public static void insert(CharSequence s, int i, Codepoint c) {
+ insert(s,i,c.getValue());
+ }
+
+ public static void insert(CharSequence s, int i, int c) {
+ if (!(s instanceof StringBuilder) &&
+ !(s instanceof StringBuffer)) {
+ insert(new StringBuilder(s),i,c);
+ } else {
+ if (i > 0 && i < s.length()) {
+ char ch = s.charAt(i);
+ boolean low = isLowSurrogate(ch);
+ if (low) {
+ if (low && isHighSurrogate(s.charAt(i-1))) {
+ i--;
+ }
+ }
+ }
+ if (s instanceof StringBuffer)
+ ((StringBuffer)s).insert(i, toString(c));
+ else if (s instanceof StringBuilder)
+ ((StringBuilder)s).insert(i, toString(c));
+ }
+ }
+
+ public static void setChar(CharSequence s, int i, Codepoint c) {
+ setChar(s,i,c.getValue());
+ }
+
+ public static void setChar(CharSequence s, int i, int c) {
+ if (!(s instanceof StringBuilder) &&
+ !(s instanceof StringBuffer)) {
+ setChar(new StringBuilder(s),i,c);
+ } else {
+ int l = 1;
+ char ch = s.charAt(i);
+ boolean high = isHighSurrogate(ch);
+ boolean low = isLowSurrogate(ch);
+ if (high || low) {
+ if (high && (i+1) < s.length() && isLowSurrogate(s.charAt(i+1))) l++;
+ else {
+ if (low && i > 0 && isHighSurrogate(s.charAt(i-1))) {
+ i--; l++;
+ }
+ }
+ }
+ if (s instanceof StringBuffer)
+ ((StringBuffer)s).replace(i, i+l, toString(c));
+ else if (s instanceof StringBuilder)
+ ((StringBuilder)s).replace(i, i+l, toString(c));
+ }
+ }
+
+ public static int length(Codepoint c) {
+ return c.getCharCount();
+ }
+
+ public static int length(int c) {
+ return new Codepoint(c).getCharCount();
+ }
+
+ public static int length(CharSequence c) {
+ return length(CodepointIterator.forCharSequence(c));
+ }
+
+ public static int length(char[] c) {
+ return length(CodepointIterator.forCharArray(c));
+ }
+
+ private static int length(CodepointIterator ci) {
+ int n = 0;
+ while(ci.hasNext()) {
+ ci.next(); n++;
+ }
+ return n;
+ }
+
+ private static String supplementaryToString(int c) {
+ StringBuilder buf = new StringBuilder();
+ buf.append((char)getHighSurrogate(c));
+ buf.append((char)getLowSurrogate(c));
+ return buf.toString();
+ }
+
+ public static String toString(int c) {
+ return (isSupplementary(c)) ?
+ supplementaryToString(c) :
+ String.valueOf((char)c);
+ }
+
+ public static final char LRE = 0x202A;
+ public static final char RLE = 0x202B;
+ public static final char LRO = 0x202D;
+ public static final char RLO = 0x202E;
+ public static final char LRM = 0x200E;
+ public static final char RLM = 0x200F;
+ public static final char PDF = 0x202C;
+
+ /**
+ * Removes leading and trailing bidi controls from the string
+ */
+ public static String stripBidi(String s) {
+ if (s == null || s.length() <= 1) return s;
+ if (isBidi(s.charAt(0)))
+ s = s.substring(1);
+ if (isBidi(s.charAt(s.length()-1)))
+ s = s.substring(0,s.length()-1);
+ return s;
+ }
+
+ public static String stripBidiInternal(String s) {
+
+ return s.replaceAll("[\u202A\u202B\u202D\u202E\u200E\u200F\u202C]", "");
+ }
+
+ private static String wrap(String s, char c1, char c2) {
+ StringBuilder buf = new StringBuilder(s);
+ if (buf.length() > 1) {
+ if (buf.charAt(0) != c1) buf.insert(0, c1);
+ if (buf.charAt(buf.length()-1) != c2) buf.append(c2);
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Wrap the string with the specified bidi control
+ */
+ public static String wrapBidi(String s, char c) {
+ switch(c) {
+ case RLE: return wrap(s,RLE,PDF);
+ case RLO: return wrap(s,RLO,PDF);
+ case LRE: return wrap(s,LRE,PDF);
+ case LRO: return wrap(s,LRO,PDF);
+ case RLM: return wrap(s,RLM,RLM);
+ case LRM: return wrap(s,LRM,LRM);
+ default: return s;
+ }
+ }
+
+ public static boolean isDigit(Codepoint codepoint) {
+ return isDigit(codepoint.getValue());
+ }
+
+ public static boolean isDigit(int codepoint) {
+ return CharUtils.inRange(codepoint, '0', '9');
+ }
+
+ public static boolean isAlpha(Codepoint codepoint) {
+ return isAlpha(codepoint.getValue());
+ }
+
+ public static boolean isAlpha(int codepoint) {
+ return CharUtils.inRange(codepoint, 'A', 'Z') ||
+ CharUtils.inRange(codepoint, 'a', 'z');
+ }
+
+ public static boolean isAlphaDigit(Codepoint codepoint) {
+ return isAlphaDigit(codepoint.getValue());
+ }
+
+ public static boolean isAlphaDigit(int codepoint) {
+ return isDigit(codepoint) || isAlpha(codepoint);
+ }
+
+ public static boolean isBidi(Codepoint codepoint) {
+ return isBidi(codepoint.getValue());
+ }
+
+ public static boolean isBidi(int codepoint) {
+ return codepoint == LRM || // Left-to-right mark
+ codepoint == RLM || // Right-to-left mark
+ codepoint == LRE || // Left-to-right embedding
+ codepoint == RLE || // Right-to-left embedding
+ codepoint == LRO || // Left-to-right override
+ codepoint == RLO || // Right-to-left override
+ codepoint == PDF; // Pop directional formatting
+ }
+
+ public static int get_index(int[] set, int value) {
+ int s = 0, e = set.length;
+ while (e - s > 8) {
+ int i = (e + s) >> 1;
+ s = set[i] <= value ? i : s;
+ e = set[i] > value ? i : e;
+ }
+ while(s < e) {
+ if (value < set[s]) break;
+ s++;
+ }
+ return s == e ? -1 : s - 1;
+ }
+
+ /**
+ * Treats the specified int array as an Inversion Set and returns
+ * true if the value is located within the set. This will only work
+ * correctly if the values in the int array are monotonically increasing
+ */
+ public static boolean invset_contains(
+ int[] set,
+ int value) {
+ return (get_index(set,value) & 1) == 0;
+ }
+
+
+ public static enum Profile {
+ NONE(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return true;
+ }
+ }
+ ),
+ ALPHA(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isAlpha(codepoint);
+ }
+ }
+ ),
+ ALPHANUM(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isAlphaDigit(codepoint);
+ }
+ }
+ ),
+ FRAGMENT(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isFragment(codepoint);
+ }
+ }
+ ),
+ IFRAGMENT(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_ifragment(codepoint);
+ }
+ }
+ ),
+ PATH(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isPath(codepoint);
+ }
+ }
+ ),
+ IPATH(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_ipath(codepoint);
+ }
+ }
+ ),
+ IUSERINFO(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iuserinfo(codepoint);
+ }
+ }
+ ),
+ USERINFO(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isUserInfo(codepoint);
+ }
+ }
+ ),
+ QUERY(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isQuery(codepoint);
+ }
+ }
+ ),
+ IQUERY(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iquery(codepoint);
+ }
+ }
+ ),
+ SCHEME(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isScheme(codepoint);
+ }
+ }
+ ),
+ PATHNODELIMS(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isPathNoDelims(codepoint);
+ }
+ }
+ ),
+ IPATHNODELIMS(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_ipathnodelims(codepoint);
+ }
+ }
+ ),
+ IREGNAME(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iregname(codepoint);
+ }
+ }
+ ),
+ IPRIVATE(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iprivate(codepoint);
+ }
+ }
+ ),
+ RESERVED(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isReserved(codepoint);
+ }
+ }
+ ),
+ IUNRESERVED(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iunreserved(codepoint);
+ }
+ }
+ ),
+ UNRESERVED(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !isUnreserved(codepoint);
+ }
+ }
+ ),
+ SCHEMESPECIFICPART(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_iunreserved(codepoint) &&
+ !isReserved(codepoint) &&
+ !is_iprivate(codepoint) &&
+ !isPctEnc(codepoint) &&
+ codepoint != '#';
+ }
+ }
+ ),
+ AUTHORITY(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !is_regname(codepoint) &&
+ !isUserInfo(codepoint) &&
+ !isGenDelim(codepoint);
+ }
+ }
+ ),
+ ASCIISANSCRLF(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !CharUtils.inRange(codepoint,1,9) &&
+ !CharUtils.inRange(codepoint,14,127);
+ }
+ }
+ ),
+ PCT(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !CharUtils.isPctEnc(codepoint);
+ }
+ }
+ ),
+ STD3ASCIIRULES(
+ new Filter() {
+ public boolean accept(int codepoint) {
+ return !CharUtils.inRange(codepoint,0x0000,0x002C) &&
+ !CharUtils.inRange(codepoint,0x002E,0x002F) &&
+ !CharUtils.inRange(codepoint,0x003A,0x0040) &&
+ !CharUtils.inRange(codepoint,0x005B,0x0060) &&
+ !CharUtils.inRange(codepoint,0x007B,0x007F);
+ }
+ }
+ )
+ ;
+ private final Filter filter;
+ Profile(Filter filter) {
+ this.filter = filter;
+ }
+ public Filter filter() {
+ return filter;
+ }
+ public boolean check(int codepoint) {
+ return filter.accept(codepoint);
+ }
+ }
+
+ public static boolean isPctEnc(int codepoint) {
+ return codepoint == '%' ||
+ isDigit(codepoint) ||
+ CharUtils.inRange(codepoint,'A','F') ||
+ CharUtils.inRange(codepoint,'a','f');
+ }
+
+ public static boolean isMark(int codepoint) {
+ return codepoint == '-' ||
+ codepoint == '_' ||
+ codepoint == '.' ||
+ codepoint == '!' ||
+ codepoint == '~' ||
+ codepoint == '*' ||
+ codepoint == '\\' ||
+ codepoint == '\'' ||
+ codepoint == '(' ||
+ codepoint == ')';
+ }
+
+ public static boolean isUnreserved(int codepoint) {
+ return isAlphaDigit(codepoint) ||
+ codepoint == '-' ||
+ codepoint == '.' ||
+ codepoint == '_' ||
+ codepoint == '~';
+ }
+
+ public static boolean isReserved(int codepoint) {
+ return codepoint == '$' ||
+ codepoint == '&' ||
+ codepoint == '+' ||
+ codepoint == ',' ||
+ codepoint == '/' ||
+ codepoint == ':' ||
+ codepoint == ';' ||
+ codepoint == '=' ||
+ codepoint == '?' ||
+ codepoint == '@' ||
+ codepoint == '[' ||
+ codepoint == ']';
+ }
+
+ public static boolean isGenDelim(int codepoint) {
+ return codepoint == '#' ||
+ codepoint == '/' ||
+ codepoint == ':' ||
+ codepoint == '?' ||
+ codepoint == '@' ||
+ codepoint == '[' ||
+ codepoint == ']';
+ }
+
+ public static boolean isSubDelim(int codepoint) {
+ return codepoint == '!' ||
+ codepoint == '$' ||
+ codepoint == '&' ||
+ codepoint == '\'' ||
+ codepoint == '(' ||
+ codepoint == ')' ||
+ codepoint == '*' ||
+ codepoint == '+' ||
+ codepoint == ',' ||
+ codepoint == ';' ||
+ codepoint == '=' ||
+ codepoint == '\\';
+ }
+
+ public static boolean isPchar(int codepoint) {
+ return isUnreserved(codepoint) ||
+ codepoint == ':' ||
+ codepoint == '@' ||
+ codepoint == '&' ||
+ codepoint == '=' ||
+ codepoint == '+' ||
+ codepoint == '$' ||
+ codepoint == ',';
+ }
+
+ public static boolean isPath(int codepoint) {
+ return isPchar(codepoint) ||
+ codepoint == ';' ||
+ codepoint == '/' ||
+ codepoint == '%' ||
+ codepoint == ',';
+ }
+
+ public static boolean isPathNoDelims(int codepoint) {
+ return isPath(codepoint) && !isGenDelim(codepoint);
+ }
+
+ public static boolean isScheme(int codepoint) {
+ return isAlphaDigit(codepoint) ||
+ codepoint == '+' ||
+ codepoint == '-' ||
+ codepoint == '.';
+ }
+
+
+ public static boolean isUserInfo(int codepoint) {
+ return isUnreserved(codepoint) ||
+ isSubDelim(codepoint) ||
+ isPctEnc(codepoint);
+ }
+
+ public static boolean isQuery(int codepoint) {
+ return isPchar(codepoint) ||
+ codepoint == ';' ||
+ codepoint == '/' ||
+ codepoint == '?' ||
+ codepoint == '%';
+ }
+
+ public static boolean isFragment(int codepoint) {
+ return isPchar(codepoint) ||
+ codepoint == '/' ||
+ codepoint == '?' ||
+ codepoint == '%';
+ }
+
+ public static boolean is_ucschar(int codepoint) {
+ return
+ CharUtils.inRange(codepoint,'\u00A0', '\uD7FF') ||
+ CharUtils.inRange(codepoint,'\uF900','\uFDCF') ||
+ CharUtils.inRange(codepoint,'\uFDF0','\uFFEF') ||
+ CharUtils.inRange(codepoint,0x10000,0x1FFFD) ||
+ CharUtils.inRange(codepoint,0x20000,0x2FFFD) ||
+ CharUtils.inRange(codepoint,0x30000,0x3FFFD) ||
+ CharUtils.inRange(codepoint,0x40000,0x4FFFD) ||
+ CharUtils.inRange(codepoint,0x50000,0x5FFFD) ||
+ CharUtils.inRange(codepoint,0x60000,0x6FFFD) ||
+ CharUtils.inRange(codepoint,0x70000,0x7FFFD) ||
+ CharUtils.inRange(codepoint,0x80000,0x8FFFD) ||
+ CharUtils.inRange(codepoint,0x90000,0x9FFFD) ||
+ CharUtils.inRange(codepoint,0xA0000,0xAFFFD) ||
+ CharUtils.inRange(codepoint,0xB0000,0xBFFFD) ||
+ CharUtils.inRange(codepoint,0xC0000,0xCFFFD) ||
+ CharUtils.inRange(codepoint,0xD0000,0xDFFFD) ||
+ CharUtils.inRange(codepoint,0xE1000,0xEFFFD);
+ }
+
+ public static boolean is_iprivate(int codepoint) {
+ return
+ CharUtils.inRange(codepoint,'\uE000', '\uF8FF') ||
+ CharUtils.inRange(codepoint, 0xF0000,0xFFFFD) ||
+ CharUtils.inRange(codepoint, 0x100000,0x10FFFD);
+ }
+
+ public static boolean is_iunreserved(int codepoint) {
+ return isAlphaDigit(codepoint) || isMark(codepoint) || is_ucschar(codepoint);
+ }
+
+ public static boolean is_ipchar(int codepoint) {
+ return is_iunreserved(codepoint) ||
+ codepoint == ':' ||
+ codepoint == '@' ||
+ codepoint == '&' ||
+ codepoint == '=' ||
+ codepoint == '+' ||
+ codepoint == '$';
+ }
+
+ public static boolean is_ipath(int codepoint) {
+ return is_ipchar(codepoint) ||
+ codepoint == ';' ||
+ codepoint == '/' ||
+ codepoint == '%' ||
+ codepoint == ',';
+ }
+
+ public static boolean is_ipathnodelims(int codepoint) {
+ return is_ipath(codepoint) && !isGenDelim(codepoint);
+ }
+
+ public static boolean is_iquery(int codepoint) {
+ return is_ipchar(codepoint) ||
+ is_iprivate(codepoint) ||
+ codepoint == ';' ||
+ codepoint == '/' ||
+ codepoint == '?' ||
+ codepoint == '%';
+ }
+
+ public static boolean is_ifragment(int codepoint) {
+ return is_ipchar(codepoint) ||
+ is_iprivate(codepoint) ||
+ codepoint == '/' ||
+ codepoint == '?' ||
+ codepoint == '%';
+ }
+
+ public static boolean is_iregname(int codepoint) {
+ return is_iunreserved(codepoint) ||
+ codepoint == '!' ||
+ codepoint == '$' ||
+ codepoint == '&' ||
+ codepoint == '\'' ||
+ codepoint == '(' ||
+ codepoint == ')' ||
+ codepoint == '*' ||
+ codepoint == '+' ||
+ codepoint == ',' ||
+ codepoint == ';' ||
+ codepoint == '=' ||
+ codepoint == '"';
+ }
+
+ public static boolean is_regname(int codepoint) {
+ return isUnreserved(codepoint) ||
+ codepoint == '!' ||
+ codepoint == '$' ||
+ codepoint == '&' ||
+ codepoint == '\'' ||
+ codepoint == '(' ||
+ codepoint == ')' ||
+ codepoint == '*' ||
+ codepoint == '+' ||
+ codepoint == ',' ||
+ codepoint == ';' ||
+ codepoint == '=' ||
+ codepoint == '"';
+ }
+
+ public static boolean is_iuserinfo(int codepoint) {
+ return is_iunreserved(codepoint) ||
+ codepoint == ';' ||
+ codepoint == ':' ||
+ codepoint == '&' ||
+ codepoint == '=' ||
+ codepoint == '+' ||
+ codepoint == '$' ||
+ codepoint == ',';
+ }
+
+ public static boolean is_iserver(int codepoint) {
+ return is_iuserinfo(codepoint) ||
+ is_iregname(codepoint) ||
+ isAlphaDigit(codepoint) ||
+ codepoint == '.' ||
+ codepoint == ':' ||
+ codepoint == '@' ||
+ codepoint == '[' ||
+ codepoint == ']' ||
+ codepoint == '%' ||
+ codepoint == '-';
+ }
+
+ public static void verify(CodepointIterator ci, Profile profile) throws InvalidCharacterException {
+ CodepointIterator rci = CodepointIterator.restrict(ci, profile.filter());
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verify(char[] s, Profile profile) throws InvalidCharacterException {
+ if (s == null) return;
+ verify(CodepointIterator.forCharArray(s),profile);
+ }
+
+ public static void verify(String s, Profile profile) throws InvalidCharacterException {
+ if (s == null) return;
+ verify(CodepointIterator.forCharSequence(s),profile);
+ }
+
+ public static void verifyNot(CodepointIterator ci, Profile profile) throws InvalidCharacterException {
+ CodepointIterator rci = ci.restrict(profile.filter(),false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+ public static void verifyNot(char[] array, Profile profile) throws InvalidCharacterException {
+ CodepointIterator rci =
+ CodepointIterator.forCharArray(array)
+ .restrict(profile.filter(),false,true);
+ while (rci.hasNext()) rci.next();
+ }
+
+}
+
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Codepoint.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,169 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.Serializable;
+import java.io.UnsupportedEncodingException;
+
+public class Codepoint
+ implements Serializable,
+ Cloneable,
+ Comparable<Codepoint>{
+
+ private static final long serialVersionUID = 140337939131905483L;
+
+ private static final String DEFAULT_ENCODING = "UTF-8";
+ private final int value;
+
+ public Codepoint(byte[] bytes) {
+ try {
+ this.value = valueFromCharSequence(new String(bytes,DEFAULT_ENCODING));
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public Codepoint(
+ byte[] bytes,
+ String encoding)
+ throws UnsupportedEncodingException {
+ this.value = valueFromCharSequence(new String(bytes,encoding));
+ }
+
+ public Codepoint(CharSequence value) {
+ this(valueFromCharSequence(value));
+ }
+
+ private static int valueFromCharSequence(CharSequence s) {
+ if (s.length() == 1) {
+ return (int)s.charAt(0);
+ } else if (s.length() > 2) {
+ throw new IllegalArgumentException("Too many chars");
+ } else {
+ char high = s.charAt(0);
+ char low = s.charAt(1);
+ return CharUtils.toSupplementary(high, low).getValue();
+ }
+ }
+
+ public Codepoint(char value) {
+ this((int)value);
+ }
+
+ public Codepoint(char high, char low) {
+ this(CharUtils.toSupplementary(high, low).getValue());
+ }
+
+ public Codepoint(Codepoint codepoint) {
+ this(codepoint.value);
+ }
+
+ public Codepoint(int value) {
+ if (value < 0)
+ throw new IllegalArgumentException(
+ "Invalid Codepoint");
+ this.value = value;
+ }
+
+ public int getValue() {
+ return value;
+ }
+
+ public boolean isSupplementary() {
+ return CharUtils.isSupplementary(value);
+ }
+
+ public boolean isLowSurrogate() {
+ return CharUtils.isLowSurrogate((char)value);
+ }
+
+ public boolean isHighSurrogate() {
+ return CharUtils.isHighSurrogate((char)value);
+ }
+
+ public int compareTo(Codepoint o) {
+ return value < o.value ? -1 :
+ value == o.value ? 0 : 1;
+ }
+
+ public String toString() {
+ return CharUtils.toString(value);
+ }
+
+ public char[] toChars() {
+ return toString().toCharArray();
+ }
+
+ public int getCharCount() {
+ return toChars().length;
+ }
+
+ public byte[] toBytes() {
+ try {
+ return toBytes(DEFAULT_ENCODING);
+ } catch (UnsupportedEncodingException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public byte[] toBytes(
+ String encoding)
+ throws UnsupportedEncodingException {
+ return toString().getBytes(encoding);
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + value;
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ final Codepoint other = (Codepoint) obj;
+ if (value != other.value) return false;
+ return true;
+ }
+
+ /**
+ * Plane 0 (0000âFFFF): Basic Multilingual Plane (BMP). This is the plane containing most of the character assignments so far. A primary objective for the BMP is to support the unification of prior character sets as well as characters for writing systems in current use.
+ * Plane 1 (10000â1FFFF): Supplementary Multilingual Plane (SMP).
+ * Plane 2 (20000â2FFFF): Supplementary Ideographic Plane (SIP)
+ * Planes 3 to 13 (30000âDFFFF) are unassigned
+ * Plane 14 (E0000âEFFFF): Supplementary Special-purpose Plane (SSP)
+ * Plane 15 (F0000âFFFFF) reserved for the Private Use Area (PUA)
+ * Plane 16 (100000â10FFFF), reserved for the Private Use Area (PUA)
+ **/
+ public int getPlane() {
+ return value / (0xFFFF + 1);
+ }
+
+ public Codepoint clone() {
+ try {
+ return (Codepoint) super.clone();
+ } catch (CloneNotSupportedException e) {
+ return new Codepoint(value);
+ }
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/CodepointIterator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,515 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import java.nio.channels.WritableByteChannel;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+
+
+/**
+ * Provides an iterator over Unicode Codepoints
+ */
+public abstract class CodepointIterator
+ implements Iterator<Codepoint> {
+
+ /**
+ * Get a CodepointIterator for the specified char array
+ */
+ public static CodepointIterator forCharArray(char[] array) {
+ return new CharArrayCodepointIterator(array);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified CharSequence
+ */
+ public static CodepointIterator forCharSequence(CharSequence seq) {
+ return new CharSequenceCodepointIterator(seq);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified byte array, using the default charset
+ */
+ public static CodepointIterator forByteArray(byte[] array) {
+ return new ByteArrayCodepointIterator(array);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified byte array, using the specified charset
+ */
+ public static CodepointIterator forByteArray(byte[] array, String charset) {
+ return new ByteArrayCodepointIterator(array,charset);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified CharBuffer
+ */
+ public static CodepointIterator forCharBuffer(CharBuffer buffer) {
+ return new CharBufferCodepointIterator(buffer);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified ReadableByteChannel
+ */
+ public static CodepointIterator forReadableByteChannel(ReadableByteChannel channel) {
+ return new ReadableByteChannelCodepointIterator(channel);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified ReadableByteChannel
+ */
+ public static CodepointIterator forReadableByteChannel(ReadableByteChannel channel, String charset) {
+ return new ReadableByteChannelCodepointIterator(channel,charset);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified InputStream
+ */
+ public static CodepointIterator forInputStream(InputStream in) {
+ return new ReadableByteChannelCodepointIterator(Channels.newChannel(in));
+ }
+
+ /**
+ * Get a CodepointIterator for the specified InputStream using the specified charset
+ */
+ public static CodepointIterator forInputStream(InputStream in, String charset) {
+ return new ReadableByteChannelCodepointIterator(Channels.newChannel(in),charset);
+ }
+
+ /**
+ * Get a CodepointIterator for the specified Reader
+ */
+ public static CodepointIterator forReader(Reader in) {
+ return new ReaderCodepointIterator(in);
+ }
+
+ public static CodepointIterator restrict(CodepointIterator ci, Filter filter) {
+ return new RestrictedCodepointIterator(ci,filter,false);
+ }
+
+ public static CodepointIterator restrict(CodepointIterator ci, Filter filter, boolean scanning) {
+ return new RestrictedCodepointIterator(ci,filter,scanning);
+ }
+
+ public static CodepointIterator restrict(CodepointIterator ci, Filter filter, boolean scanning, boolean invert) {
+ return new RestrictedCodepointIterator(ci,filter,scanning,invert);
+ }
+
+ protected int position = -1;
+ protected int limit = -1;
+
+ public CodepointIterator restrict(Filter filter) {
+ return restrict(this, filter);
+ }
+
+ public CodepointIterator restrict(Filter filter, boolean scanning) {
+ return restrict(this, filter, scanning);
+ }
+
+ public CodepointIterator restrict(Filter filter, boolean scanning, boolean invert) {
+ return restrict(this, filter, scanning, invert);
+ }
+
+ /**
+ * Get the next char
+ */
+ protected abstract char get();
+
+ /**
+ * Get the specified char
+ */
+ protected abstract char get(int index);
+
+ /**
+ * True if there are codepoints remaining
+ */
+ public boolean hasNext() {
+ return remaining() > 0;
+ }
+
+ /**
+ * Return the final index position
+ */
+ public int lastPosition() {
+ int p = position();
+ return (p > -1) ?
+ (p >= limit()) ? p : p - 1 : -1;
+ }
+
+ /**
+ * Return the next chars. If the codepoint is not supplemental,
+ * the char array will have a single member. If the codepoint is
+ * supplemental, the char array will have two members, representing
+ * the high and low surrogate chars
+ */
+ public char[] nextChars() throws InvalidCharacterException {
+ if (hasNext()) {
+ if (isNextSurrogate()) {
+ char c1 = get();
+ if (CharUtils.isHighSurrogate(c1) && position() < limit()) {
+ char c2 = get();
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && position() > 0) {
+ char c2 = get(position()-2);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ }
+ }
+ return new char[] {get()};
+ }
+ return null;
+ }
+
+ /**
+ * Peek the next chars in the iterator. If the codepoint is not supplemental,
+ * the char array will have a single member. If the codepoint is
+ * supplemental, the char array will have two members, representing
+ * the high and low surrogate chars
+ */
+ public char[] peekChars() throws InvalidCharacterException {
+ return peekChars(position());
+ }
+
+ /**
+ * Peek the specified chars in the iterator. If the codepoint is not supplemental,
+ * the char array will have a single member. If the codepoint is
+ * supplemental, the char array will have two members, representing
+ * the high and low surrogate chars
+ */
+ private char[] peekChars(int pos) throws InvalidCharacterException {
+ if (pos < 0 || pos >= limit()) return null;
+ char c1 = get(pos);
+ if (CharUtils.isHighSurrogate(c1) && pos < limit()) {
+ char c2 = get(pos+1);
+ if (CharUtils.isLowSurrogate(c2)) {
+ return new char[] {c1,c2};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else if (CharUtils.isLowSurrogate(c1) && pos > 1) {
+ char c2 = get(pos-1);
+ if (CharUtils.isHighSurrogate(c2)) {
+ return new char[] {c2,c1};
+ } else {
+ throw new InvalidCharacterException(c2);
+ }
+ } else return new char[] {c1};
+ }
+
+ /**
+ * Return the next codepoint
+ */
+ public Codepoint next() throws InvalidCharacterException {
+ return toCodepoint(nextChars());
+ }
+
+ /**
+ * Peek the next codepoint
+ */
+ public Codepoint peek() throws InvalidCharacterException {
+ return toCodepoint(peekChars());
+ }
+
+ /**
+ * Peek the specified codepoint
+ */
+ public Codepoint peek(int index) throws InvalidCharacterException {
+ return toCodepoint(peekChars(index));
+ }
+
+ private Codepoint toCodepoint(char[] chars) {
+ return (chars == null) ? null :
+ (chars.length == 1) ? new Codepoint(chars[0]) :
+ CharUtils.toSupplementary(chars[0], chars[1]);
+ }
+
+ /**
+ * Set the iterator position
+ */
+ public void position(int n) {
+ if (n < 0 || n > limit()) throw new ArrayIndexOutOfBoundsException(n);
+ position = n;
+ }
+
+ /**
+ * Get the iterator position
+ */
+ public int position() {
+ return position;
+ }
+
+ /**
+ * Return the iterator limit
+ */
+ public int limit() {
+ return limit;
+ }
+
+ /**
+ * Return the remaining iterator size
+ */
+ public int remaining() {
+ return limit - position();
+ }
+
+ private boolean isNextSurrogate() {
+ if (!hasNext()) return false;
+ char c = get(position());
+ return CharUtils.isHighSurrogate(c) || CharUtils.isLowSurrogate(c);
+ }
+
+ /**
+ * Returns true if the char at the specified index is a high surrogate
+ */
+ public boolean isHigh(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isHighSurrogate(get(index));
+ }
+
+ /**
+ * Returns true if the char at the specified index is a low surrogate
+ */
+ public boolean isLow(int index) {
+ if (index < 0 || index > limit()) throw new ArrayIndexOutOfBoundsException(index);
+ return CharUtils.isLowSurrogate(get(index));
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+
+ static class ByteArrayCodepointIterator
+ extends CharArrayCodepointIterator {
+ public ByteArrayCodepointIterator(byte[] bytes) {
+ this(bytes,Charset.defaultCharset());
+ }
+ public ByteArrayCodepointIterator(byte[] bytes, String charset) {
+ this(bytes,Charset.forName(charset));
+ }
+ public ByteArrayCodepointIterator(byte[] bytes, Charset charset) {
+ CharBuffer cb = charset.decode(ByteBuffer.wrap(bytes));
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+ }
+
+ static class CharArrayCodepointIterator
+ extends CodepointIterator {
+ protected char[] buffer;
+ protected CharArrayCodepointIterator() {}
+ public CharArrayCodepointIterator(char[] buffer) {
+ this(buffer,0,buffer.length);
+ }
+ public CharArrayCodepointIterator(char[] buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length-n,e);
+ }
+ protected char get() {
+ return (position < limit) ? buffer[position++] : (char)-1;
+ }
+ protected char get(int index) {
+ if (index < 0 || index >= limit)
+ throw new ArrayIndexOutOfBoundsException(index);
+ return buffer[index];
+ }
+ }
+
+ static class CharBufferCodepointIterator
+ extends CharArrayCodepointIterator {
+ public CharBufferCodepointIterator(CharBuffer cb) {
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ }
+ }
+
+ static class CharSequenceCodepointIterator
+ extends CodepointIterator {
+ private CharSequence buffer;
+ public CharSequenceCodepointIterator(CharSequence buffer) {
+ this(buffer,0,buffer.length());
+ }
+ public CharSequenceCodepointIterator(CharSequence buffer, int n, int e) {
+ this.buffer = buffer;
+ this.position = n;
+ this.limit = Math.min(buffer.length()-n,e);
+ }
+ protected char get() {
+ return buffer.charAt(position++);
+ }
+ protected char get(int index) {
+ return buffer.charAt(index);
+ }
+ }
+
+ static class ReadableByteChannelCodepointIterator
+ extends CharArrayCodepointIterator {
+ public ReadableByteChannelCodepointIterator(
+ ReadableByteChannel channel) {
+ this(channel,Charset.defaultCharset());
+ }
+ public ReadableByteChannelCodepointIterator(
+ ReadableByteChannel channel,
+ String charset) {
+ this(channel,Charset.forName(charset));
+ }
+ public ReadableByteChannelCodepointIterator(
+ ReadableByteChannel channel,
+ Charset charset) {
+ try {
+ ByteBuffer buf = ByteBuffer.allocate(1024);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ WritableByteChannel outc = Channels.newChannel(out);
+ while(channel.read(buf) > 0) {
+ buf.flip();
+ outc.write(buf);
+ }
+ CharBuffer cb = charset.decode(ByteBuffer.wrap(out.toByteArray()));
+ buffer = cb.array();
+ position = cb.position();
+ limit = cb.limit();
+ } catch (Exception e) {}
+ }
+ }
+
+ static class ReaderCodepointIterator
+ extends CharArrayCodepointIterator {
+ public ReaderCodepointIterator(Reader reader) {
+ try {
+ StringBuilder sb = new StringBuilder();
+ char[] buf = new char[1024];
+ int n = -1;
+ while((n = reader.read(buf)) > -1) {
+ sb.append(buf,0,n);
+ }
+ buffer = new char[sb.length()];
+ sb.getChars(0, sb.length(), buffer, 0);
+ position = 0;
+ limit = buffer.length;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ }
+
+
+ public static class RestrictedCodepointIterator
+ extends DelegatingCodepointIterator {
+
+ private final Filter filter;
+ private final boolean scanningOnly;
+ private final boolean notset;
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ Filter filter) {
+ this(internal,filter,false);
+ }
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ Filter filter,
+ boolean scanningOnly) {
+ this(internal, filter, scanningOnly, false);
+ }
+
+ protected RestrictedCodepointIterator(
+ CodepointIterator internal,
+ Filter filter,
+ boolean scanningOnly,
+ boolean notset) {
+ super(internal);
+ this.filter = filter;
+ this.scanningOnly = scanningOnly;
+ this.notset = notset;
+ }
+
+ public boolean hasNext() {
+ boolean b = super.hasNext();
+ if (scanningOnly) {
+ try {
+ int cp = peek(position()).getValue();
+ if (b && cp != -1 && check(cp)) return false;
+ } catch (InvalidCharacterException e) { return false; }
+ }
+ return b;
+ }
+
+ @Override
+ public Codepoint next() throws InvalidCharacterException {
+ int cp = super.next().getValue();
+ if (cp != -1 && check(cp)) {
+ if (scanningOnly) {
+ position(position()-1);
+ return null;
+ }
+ else throw new InvalidCharacterException(cp);
+ }
+ return new Codepoint(cp);
+ }
+
+ private boolean check(int cp) {
+ boolean answer = !filter.accept(cp);
+ return (!notset) ? !answer : answer;
+ }
+
+ @Override
+ public char[] nextChars() throws InvalidCharacterException {
+ char[] chars = super.nextChars();
+ if (chars != null && chars.length > 0) {
+ if (chars.length == 1 && check(chars[0])) {
+ if (scanningOnly) {
+ position(position()-1);
+ return null;
+ }
+ else throw new InvalidCharacterException(chars[0]);
+ } else if (chars.length == 2) {
+ int cp = CharUtils.toSupplementary(chars[0],chars[1]).getValue();
+ if (check(cp)) {
+ if (scanningOnly) {
+ position(position()-2);
+ return null;
+ }
+ else throw new InvalidCharacterException(cp);
+ }
+ }
+ }
+ return chars;
+ }
+
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/DelegatingCodepointIterator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,104 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+/**
+ * Base implementation of a CodepointIterator that filters the output of
+ * another CodpointIterator
+ */
+public abstract class DelegatingCodepointIterator
+ extends CodepointIterator {
+
+ private CodepointIterator internal;
+
+ protected DelegatingCodepointIterator(
+ CodepointIterator internal) {
+ this.internal = internal;
+ }
+
+ @Override
+ protected char get() {
+ return internal.get();
+ }
+
+ @Override
+ protected char get(int index) {
+ return internal.get(index);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return internal.hasNext();
+ }
+
+ @Override
+ public boolean isHigh(int index) {
+ return internal.isHigh(index);
+ }
+
+ @Override
+ public boolean isLow(int index) {
+ return internal.isLow(index);
+ }
+
+ @Override
+ public int limit() {
+ return internal.limit();
+ }
+
+ @Override
+ public Codepoint next() {
+ return internal.next();
+ }
+
+ @Override
+ public char[] nextChars(){
+ return internal.nextChars();
+ }
+
+ @Override
+ public Codepoint peek() {
+ return internal.peek();
+ }
+
+ @Override
+ public Codepoint peek(int index) {
+ return internal.peek(index);
+ }
+
+ @Override
+ public char[] peekChars() {
+ return internal.peekChars();
+ }
+
+ @Override
+ public int position() {
+ return internal.position();
+ }
+
+ @Override
+ public int remaining() {
+ return internal.remaining();
+ }
+
+ @Override
+ public void position(int position) {
+ internal.position(position);
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Filter.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,29 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+public interface Filter {
+
+ boolean accept(int c);
+
+ public static final Filter NONOPFILTER = new Filter() {
+ public boolean accept(int c) {
+ return true;
+ }
+ };
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/InvalidCharacterException.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,37 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+public class InvalidCharacterException
+ extends RuntimeException {
+
+ private static final long serialVersionUID = -7150645484748059676L;
+ private int input;
+
+ public InvalidCharacterException(int input) {
+ this.input = input;
+ }
+
+ @Override
+ public String getMessage() {
+ return "Invalid Character 0x" + Integer.toHexString(input) + "(" + (char)input + ")";
+ }
+
+
+}