You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@abdera.apache.org by jm...@apache.org on 2008/01/01 05:59:47 UTC
svn commit: r607801 [2/5] - in /incubator/abdera/java/trunk:
client/src/main/java/org/apache/abdera/protocol/client/
core/src/main/java/org/apache/abdera/util/
dependencies/i18n/src/main/java/org/apache/abdera/i18n/io/
dependencies/i18n/src/main/java/o...
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Nameprep.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,725 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+//import java.util.Arrays;
+
+/**
+ * Implements the Nameprep protocol
+ */
+public class Nameprep {
+
+ public static String prep(String s) {
+ NameprepCodepointIterator r = null;
+ try {
+ StringBuilder buf = new StringBuilder();
+ CodepointIterator ci = CodepointIterator.forCharSequence(s);
+ r = new NameprepCodepointIterator(ci);
+ while(r.hasNext()) {
+ int i = r.next().getValue();
+ if (i != -1)
+ buf.append((char)i);
+ }
+ return Normalizer.normalize(
+ buf.toString(),
+ Normalizer.Form.KC).toString();
+ } catch (Throwable e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static class NameprepCodepointIterator
+ extends DelegatingCodepointIterator {
+
+ private int[] rep = null;
+ private int reppos = 0;
+ private boolean haslcat = false;
+ private boolean hasrandalcat = false;
+ private boolean firstisrandalcat = false;
+
+ @Override
+ public boolean hasNext() {
+ return rep != null || super.hasNext();
+ }
+
+ protected NameprepCodepointIterator(
+ CodepointIterator internal) {
+ super(internal);
+ }
+
+ @Override
+ public Codepoint next() {
+ int r = -1;
+ if (this.rep == null) {
+ r = super.next().getValue();
+ if (r != -1) {
+ if (Nameprep.isLCat(r)) haslcat = true;
+ if (Nameprep.isRandAL(r)) {
+ hasrandalcat = true;
+ if (position() == 1) firstisrandalcat = true;
+ }
+ if (haslcat && hasrandalcat)
+ throw new RuntimeException("Bidi Exception");
+ while(r != -1 && Nameprep.isB1(r)) {
+ r = super.next().getValue();
+ }
+ if (r != -1) {
+ if (Nameprep.isProhibited(r))
+ throw new InvalidCharacterException(r);
+ int[] rep = Nameprep.B2(r);
+ if (rep != null) {
+ if (rep.length > 1) {
+ this.rep = rep;
+ reppos = 0;
+ }
+ r = rep[0];
+ }
+ }
+ }
+ } else {
+ r = rep[++reppos];
+ if (reppos+1 >= rep.length) rep = null;
+ }
+ if ((r == -1 || !hasNext()) &&
+ hasrandalcat &&
+ (!firstisrandalcat ||
+ !Nameprep.isRandAL((r ==-1)?peek(position()).getValue():r))) {
+ throw new RuntimeException("Bidi Exception");
+ }
+ return new Codepoint(r);
+ }
+
+ @Override
+ public char[] nextChars() {
+ return super.nextChars();
+ }
+
+ }
+
+ private static final int[] B1 = {
+ 0x0080, 0x0082,
+ 0x0086, 0x0087,
+ 0x0088, 0x0089,
+ 0x008B, 0x008C,
+ 0x008F, 0x0090,
+ 0x00A0, 0x00A1,
+ 0x00AD, 0x00AE,
+ 0x034F, 0x0350,
+ 0x1806, 0x1807,
+ 0x180B, 0x180E,
+ 0x200B, 0x200E,
+ 0x2060, 0x2061,
+ 0xFE00, 0xFE0F,
+ 0xFEFF, 0xFF00
+ };
+
+ private static final int[] PROHIBITED = {
+ 0x0080, 0x00A1,
+ 0x0340, 0x0342,
+ 0x06DD, 0x06DE,
+ 0x070F, 0x0810,
+ 0x1680, 0x1681,
+ 0x180E, 0x180F,
+ 0x2000, 0x2010,
+ 0x2028, 0x202A,
+ 0x202A, 0x2030,
+ 0x205F, 0x2060,
+ 0x2060, 0x2064,
+ 0x206A, 0x2070,
+ 0x2FF0, 0x2FFC,
+ 0x3000, 0x3001,
+ 0xD800, 0xF900,
+ 0xFDD0, 0xFDF0,
+ 0xFEFF, 0xFF00,
+ 0xFFF9, 0xFFFE,
+ 0x1D173, 0x1D17B,
+ 0xE0001, 0xE0002,
+ 0xE0020, 0xE0080,
+ 0xF0000, 0xFFFFE,
+ 0x100000, 0x10FFFE
+ };
+
+ private static final int[] RandAL = {
+ 0x05BE,0x05BF,
+ 0x05C0,0x05C1,
+ 0x05C3,0x05C4,
+ 0x05D0,0x05EB,
+ 0x05F0,0x05F5,
+ 0x061B,0x061C,
+ 0x061F,0x0620,
+ 0x0621,0x063B,
+ 0x0640,0x064B,
+ 0x066D,0x0670,
+ 0x0671,0x06D6,
+ 0x06DD,0x06DE,
+ 0x06E5,0x06E7,
+ 0x06FA,0x06FF,
+ 0x0700,0x070E,
+ 0x0710,0x0711,
+ 0x0712,0x072D,
+ 0x0780,0x07A6,
+ 0x07B1,0x07B2,
+ 0x200F,0x2010,
+ 0xFB1D,0xFB1E,
+ 0xFB1F,0xFB29,
+ 0xFB2A,0xFB37,
+ 0xFB38,0xFB3D,
+ 0xFB3E,0xFB3F,
+ 0xFB40,0xFB42,
+ 0xFB43,0xFB45,
+ 0xFB46,0xFBB2,
+ 0xFBD3,0xFD3E,
+ 0xFD50,0xFD90,
+ 0xFD92,0xFDC8,
+ 0xFDF0,0xFDFD,
+ 0xFE70,0xFE75,
+ 0xFE76,0xFEFD
+ };
+
+ private static final int[] notLCat = {
+ 0x0, 0x41,
+ 0x5b, 0x61,
+ 0x7b, 0xaa,
+ 0xab, 0xb5,
+ 0xb6, 0xba,
+ 0xbb, 0xc0,
+ 0xd7, 0xd8,
+ 0xf7, 0xf8,
+ 0x221, 0x222,
+ 0x234, 0x250,
+ 0x2ae, 0x2b0,
+ 0x2b9, 0x2bb,
+ 0x2c2, 0x2d0,
+ 0x2d2, 0x2e0,
+ 0x2e5, 0x2ee,
+ 0x2ef, 0x37a,
+ 0x37b, 0x386,
+ 0x387, 0x388,
+ 0x38b, 0x38c,
+ 0x38d, 0x38e,
+ 0x3a2, 0x3a3,
+ 0x3cf, 0x3d0,
+ 0x3f6, 0x400,
+ 0x483, 0x48a,
+ 0x4cf, 0x4d0,
+ 0x4f6, 0x4f8,
+ 0x4fa, 0x500,
+ 0x510, 0x531,
+ 0x557, 0x559,
+ 0x560, 0x561,
+ 0x588, 0x589,
+ 0x58a, 0x903,
+ 0x904, 0x905,
+ 0x93a, 0x93d,
+ 0x941, 0x949,
+ 0x94d, 0x950,
+ 0x951, 0x958,
+ 0x962, 0x964,
+ 0x971, 0x982,
+ 0x984, 0x985,
+ 0x98d, 0x98f,
+ 0x991, 0x993,
+ 0x9a9, 0x9aa,
+ 0x9b1, 0x9b2,
+ 0x9b3, 0x9b6,
+ 0x9ba, 0x9be,
+ 0x9c1, 0x9c7,
+ 0x9c9, 0x9cb,
+ 0x9cd, 0x9d7,
+ 0x9d8, 0x9dc,
+ 0x9de, 0x9df,
+ 0x9e2, 0x9e6,
+ 0x9f2, 0x9f4,
+ 0x9fb, 0xa05,
+ 0xa0b, 0xa0f,
+ 0xa11, 0xa13,
+ 0xa29, 0xa2a,
+ 0xa31, 0xa32,
+ 0xa34, 0xa35,
+ 0xa37, 0xa38,
+ 0xa3a, 0xa3e,
+ 0xa41, 0xa59,
+ 0xa5d, 0xa5e,
+ 0xa5f, 0xa66,
+ 0xa70, 0xa72,
+ 0xa75, 0xa83,
+ 0xa84, 0xa85,
+ 0xa8c, 0xa8d,
+ 0xa8e, 0xa8f,
+ 0xa92, 0xa93,
+ 0xaa9, 0xaaa,
+ 0xab1, 0xab2,
+ 0xab4, 0xab5,
+ 0xaba, 0xabd,
+ 0xac1, 0xac9,
+ 0xaca, 0xacb,
+ 0xacd, 0xad0,
+ 0xad1, 0xae0,
+ 0xae1, 0xae6,
+ 0xaf0, 0xb02,
+ 0xb04, 0xb05,
+ 0xb0d, 0xb0f,
+ 0xb11, 0xb13,
+ 0xb29, 0xb2a,
+ 0xb31, 0xb32,
+ 0xb34, 0xb36,
+ 0xb3a, 0xb3d,
+ 0xb3f, 0xb40,
+ 0xb41, 0xb47,
+ 0xb49, 0xb4b,
+ 0xb4d, 0xb57,
+ 0xb58, 0xb5c,
+ 0xb5e, 0xb5f,
+ 0xb62, 0xb66,
+ 0xb71, 0xb83,
+ 0xb84, 0xb85,
+ 0xb8b, 0xb8e,
+ 0xb91, 0xb92,
+ 0xb96, 0xb99,
+ 0xb9b, 0xb9c,
+ 0xb9d, 0xb9e,
+ 0xba0, 0xba3,
+ 0xba5, 0xba8,
+ 0xbab, 0xbae,
+ 0xbb6, 0xbb7,
+ 0xbba, 0xbbe,
+ 0xbc0, 0xbc1,
+ 0xbc3, 0xbc6,
+ 0xbc9, 0xbca,
+ 0xbcd, 0xbd7,
+ 0xbd8, 0xbe7,
+ 0xbf3, 0xc01,
+ 0xc04, 0xc05,
+ 0xc0d, 0xc0e,
+ 0xc11, 0xc12,
+ 0xc29, 0xc2a,
+ 0xc34, 0xc35,
+ 0xc3a, 0xc41,
+ 0xc45, 0xc60,
+ 0xc62, 0xc66,
+ 0xc70, 0xc82,
+ 0xc84, 0xc85,
+ 0xc8d, 0xc8e,
+ 0xc91, 0xc92,
+ 0xca9, 0xcaa,
+ 0xcb4, 0xcb5,
+ 0xcba, 0xcbe,
+ 0xcbf, 0xcc0,
+ 0xcc5, 0xcc7,
+ 0xcc9, 0xcca,
+ 0xccc, 0xcd5,
+ 0xcd7, 0xcde,
+ 0xcdf, 0xce0,
+ 0xce2, 0xce6,
+ 0xcf0, 0xd02,
+ 0xd04, 0xd05,
+ 0xd0d, 0xd0e,
+ 0xd11, 0xd12,
+ 0xd29, 0xd2a,
+ 0xd3a, 0xd3e,
+ 0xd41, 0xd46,
+ 0xd49, 0xd4a,
+ 0xd4d, 0xd57,
+ 0xd58, 0xd60,
+ 0xd62, 0xd66,
+ 0xd70, 0xd82,
+ 0xd84, 0xd85,
+ 0xd97, 0xd9a,
+ 0xdb2, 0xdb3,
+ 0xdbc, 0xdbd,
+ 0xdbe, 0xdc0,
+ 0xdc7, 0xdcf,
+ 0xdd2, 0xdd8,
+ 0xde0, 0xdf2,
+ 0xdf5, 0xe01,
+ 0xe31, 0xe32,
+ 0xe34, 0xe40,
+ 0xe47, 0xe4f,
+ 0xe5c, 0xe81,
+ 0xe83, 0xe84,
+ 0xe85, 0xe87,
+ 0xe89, 0xe8a,
+ 0xe8b, 0xe8d,
+ 0xe8e, 0xe94,
+ 0xe98, 0xe99,
+ 0xea0, 0xea1,
+ 0xea4, 0xea5,
+ 0xea6, 0xea7,
+ 0xea8, 0xeaa,
+ 0xeac, 0xead,
+ 0xeb1, 0xeb2,
+ 0xeb4, 0xebd,
+ 0xebe, 0xec0,
+ 0xec5, 0xec6,
+ 0xec7, 0xed0,
+ 0xeda, 0xedc,
+ 0xede, 0xf00,
+ 0xf18, 0xf1a,
+ 0xf35, 0xf36,
+ 0xf37, 0xf38,
+ 0xf39, 0xf3e,
+ 0xf48, 0xf49,
+ 0xf6b, 0xf7f,
+ 0xf80, 0xf85,
+ 0xf86, 0xf88,
+ 0xf8c, 0xfbe,
+ 0xfc6, 0xfc7,
+ 0xfcd, 0xfcf,
+ 0xfd0, 0x1000,
+ 0x1022, 0x1023,
+ 0x1028, 0x1029,
+ 0x102b, 0x102c,
+ 0x102d, 0x1031,
+ 0x1032, 0x1038,
+ 0x1039, 0x1040,
+ 0x1058, 0x10a0,
+ 0x10c6, 0x10d0,
+ 0x10f9, 0x10fb,
+ 0x10fc, 0x1100,
+ 0x115a, 0x115f,
+ 0x11a3, 0x11a8,
+ 0x11fa, 0x1200,
+ 0x1207, 0x1208,
+ 0x1247, 0x1248,
+ 0x1249, 0x124a,
+ 0x124e, 0x1250,
+ 0x1257, 0x1258,
+ 0x1259, 0x125a,
+ 0x125e, 0x1260,
+ 0x1287, 0x1288,
+ 0x1289, 0x128a,
+ 0x128e, 0x1290,
+ 0x12af, 0x12b0,
+ 0x12b1, 0x12b2,
+ 0x12b6, 0x12b8,
+ 0x12bf, 0x12c0,
+ 0x12c1, 0x12c2,
+ 0x12c6, 0x12c8,
+ 0x12cf, 0x12d0,
+ 0x12d7, 0x12d8,
+ 0x12ef, 0x12f0,
+ 0x130f, 0x1310,
+ 0x1311, 0x1312,
+ 0x1316, 0x1318,
+ 0x131f, 0x1320,
+ 0x1347, 0x1348,
+ 0x135b, 0x1361,
+ 0x137d, 0x13a0,
+ 0x13f5, 0x1401,
+ 0x1677, 0x1681,
+ 0x169b, 0x16a0,
+ 0x16f1, 0x1700,
+ 0x170d, 0x170e,
+ 0x1712, 0x1720,
+ 0x1732, 0x1735,
+ 0x1737, 0x1740,
+ 0x1752, 0x1760,
+ 0x176d, 0x176e,
+ 0x1771, 0x1780,
+ 0x17b7, 0x17be,
+ 0x17c6, 0x17c7,
+ 0x17c9, 0x17d4,
+ 0x17db, 0x17dc,
+ 0x17dd, 0x17e0,
+ 0x17ea, 0x1810,
+ 0x181a, 0x1820,
+ 0x1878, 0x1880,
+ 0x18a9, 0x1e00,
+ 0x1e9c, 0x1ea0,
+ 0x1efa, 0x1f00,
+ 0x1f16, 0x1f18,
+ 0x1f1e, 0x1f20,
+ 0x1f46, 0x1f48,
+ 0x1f4e, 0x1f50,
+ 0x1f58, 0x1f59,
+ 0x1f5a, 0x1f5b,
+ 0x1f5c, 0x1f5d,
+ 0x1f5e, 0x1f5f,
+ 0x1f7e, 0x1f80,
+ 0x1fb5, 0x1fb6,
+ 0x1fbd, 0x1fbe,
+ 0x1fbf, 0x1fc2,
+ 0x1fc5, 0x1fc6,
+ 0x1fcd, 0x1fd0,
+ 0x1fd4, 0x1fd6,
+ 0x1fdc, 0x1fe0,
+ 0x1fed, 0x1ff2,
+ 0x1ff5, 0x1ff6,
+ 0x1ffd, 0x200e,
+ 0x200f, 0x2071,
+ 0x2072, 0x207f,
+ 0x2080, 0x2102,
+ 0x2103, 0x2107,
+ 0x2108, 0x210a,
+ 0x2114, 0x2115,
+ 0x2116, 0x2119,
+ 0x211e, 0x2124,
+ 0x2125, 0x2126,
+ 0x2127, 0x2128,
+ 0x2129, 0x212a,
+ 0x212e, 0x212f,
+ 0x2132, 0x2133,
+ 0x213a, 0x213d,
+ 0x2140, 0x2145,
+ 0x214a, 0x2160,
+ 0x2184, 0x2336,
+ 0x237b, 0x2395,
+ 0x2396, 0x249c,
+ 0x24ea, 0x3005,
+ 0x3008, 0x3021,
+ 0x302a, 0x3031,
+ 0x3036, 0x3038,
+ 0x303d, 0x3041,
+ 0x3097, 0x309d,
+ 0x30a0, 0x30a1,
+ 0x30fb, 0x30fc,
+ 0x3100, 0x3105,
+ 0x312d, 0x3131,
+ 0x318f, 0x3190,
+ 0x31b8, 0x31f0,
+ 0x321d, 0x3220,
+ 0x3244, 0x3260,
+ 0x327c, 0x327f,
+ 0x32b1, 0x32c0,
+ 0x32cc, 0x32d0,
+ 0x32ff, 0x3300,
+ 0x3377, 0x337b,
+ 0x33de, 0x33e0,
+ 0x33ff, 0x3400,
+ 0x4db6, 0x4e00,
+ 0x9fa6, 0xa000,
+ 0xa48d, 0xac00,
+ 0xd7a4, 0xd800,
+ 0xfa2e, 0xfa30,
+ 0xfa6b, 0xfb00,
+ 0xfb07, 0xfb13,
+ 0xfb18, 0xff21,
+ 0xff3b, 0xff41,
+ 0xff5b, 0xff66,
+ 0xffbf, 0xffc2,
+ 0xffc8, 0xffca,
+ 0xffd0, 0xffd2,
+ 0xffd8, 0xffda,
+ 0xffdd, 0x10300,
+ 0x1031f, 0x10320,
+ 0x10324, 0x10330,
+ 0x1034b, 0x10400,
+ 0x10426, 0x10428,
+ 0x1044e, 0x1d000,
+ 0x1d0f6, 0x1d100,
+ 0x1d127, 0x1d12a,
+ 0x1d167, 0x1d16a,
+ 0x1d173, 0x1d183,
+ 0x1d185, 0x1d18c,
+ 0x1d1aa, 0x1d1ae,
+ 0x1d1de, 0x1d400,
+ 0x1d455, 0x1d456,
+ 0x1d49d, 0x1d49e,
+ 0x1d4a0, 0x1d4a2,
+ 0x1d4a3, 0x1d4a5,
+ 0x1d4a7, 0x1d4a9,
+ 0x1d4ad, 0x1d4ae,
+ 0x1d4ba, 0x1d4bb,
+ 0x1d4bc, 0x1d4bd,
+ 0x1d4c1, 0x1d4c2,
+ 0x1d4c4, 0x1d4c5,
+ 0x1d506, 0x1d507,
+ 0x1d50b, 0x1d50d,
+ 0x1d515, 0x1d516,
+ 0x1d51d, 0x1d51e,
+ 0x1d53a, 0x1d53b,
+ 0x1d53f, 0x1d540,
+ 0x1d545, 0x1d546,
+ 0x1d547, 0x1d54a,
+ 0x1d551, 0x1d552,
+ 0x1d6a4, 0x1d6a8,
+ 0x1d7ca, 0x20000,
+ 0x2a6d7, 0x2f800,
+ 0x2fa1e, 0xf0000,
+ 0xffffe, 0x100000,
+ 0x10fffe
+ };
+
+ public static final int[] b2index = {
+ 65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,
+ 85,86,87,88,89,90,181,192,193,194,195,196,197,198,199,200,201,202,203,204,
+ 205,206,207,208,209,210,211,212,213,214,216,217,218,219,220,221,222,223,256,258,
+ 260,262,264,266,268,270,272,274,276,278,280,282,284,286,288,290,292,294,296,298,
+ 300,302,304,306,308,310,313,315,317,319,321,323,325,327,329,330,332,334,336,338,
+ 340,342,344,346,348,350,352,354,356,358,360,362,364,366,368,370,372,374,376,377,
+ 379,381,383,385,386,388,390,391,393,394,395,398,399,400,401,403,404,406,407,408,
+ 412,413,415,416,418,420,422,423,425,428,430,431,433,434,435,437,439,440,444,452,
+ 453,455,456,458,459,461,463,465,467,469,471,473,475,478,480,482,484,486,488,490,
+ 492,494,496,497,498,500,502,503,504,506,508,510,512,514,516,518,520,522,524,526,
+ 528,530,532,534,536,538,540,542,544,546,548,550,552,554,556,558,560,562,837,890,
+ 902,904,905,906,908,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,
+ 925,926,927,928,929,931,932,933,934,935,936,937,938,939,944,962,976,977,978,979,
+ 980,981,982,984,986,988,990,992,994,996,998,1000,1002,1004,1006,1008,1009,1010,1012,1013,
+ 1024,1025,1026,1027,1028,1029,1030,1031,1032,1033,1034,1035,1036,1037,1038,1039,1040,1041,1042,1043,
+ 1044,1045,1046,1047,1048,1049,1050,1051,1052,1053,1054,1055,1056,1057,1058,1059,1060,1061,1062,1063,
+ 1064,1065,1066,1067,1068,1069,1070,1071,1120,1122,1124,1126,1128,1130,1132,1134,1136,1138,1140,1142,
+ 1144,1146,1148,1150,1152,1162,1164,1166,1168,1170,1172,1174,1176,1178,1180,1182,1184,1186,1188,1190,
+ 1192,1194,1196,1198,1200,1202,1204,1206,1208,1210,1212,1214,1217,1219,1221,1223,1225,1227,1229,1232,
+ 1234,1236,1238,1240,1242,1244,1246,1248,1250,1252,1254,1256,1258,1260,1262,1264,1266,1268,1272,1280,
+ 1282,1284,1286,1288,1290,1292,1294,1329,1330,1331,1332,1333,1334,1335,1336,1337,1338,1339,1340,1341,
+ 1342,1343,1344,1345,1346,1347,1348,1349,1350,1351,1352,1353,1354,1355,1356,1357,1358,1359,1360,1361,
+ 1362,1363,1364,1365,1366,1415,7680,7682,7684,7686,7688,7690,7692,7694,7696,7698,7700,7702,7704,7706,
+ 7708,7710,7712,7714,7716,7718,7720,7722,7724,7726,7728,7730,7732,7734,7736,7738,7740,7742,7744,7746,
+ 7748,7750,7752,7754,7756,7758,7760,7762,7764,7766,7768,7770,7772,7774,7776,7778,7780,7782,7784,7786,
+ 7788,7790,7792,7794,7796,7798,7800,7802,7804,7806,7808,7810,7812,7814,7816,7818,7820,7822,7824,7826,
+ 7828,7830,7831,7832,7833,7834,7835,7840,7842,7844,7846,7848,7850,7852,7854,7856,7858,7860,7862,7864,
+ 7866,7868,7870,7872,7874,7876,7878,7880,7882,7884,7886,7888,7890,7892,7894,7896,7898,7900,7902,7904,
+ 7906,7908,7910,7912,7914,7916,7918,7920,7922,7924,7926,7928,7944,7945,7946,7947,7948,7949,7950,7951,
+ 7960,7961,7962,7963,7964,7965,7976,7977,7978,7979,7980,7981,7982,7983,7992,7993,7994,7995,7996,7997,
+ 7998,7999,8008,8009,8010,8011,8012,8013,8016,8018,8020,8022,8025,8027,8029,8031,8040,8041,8042,8043,
+ 8044,8045,8046,8047,8064,8065,8066,8067,8068,8069,8070,8071,8072,8073,8074,8075,8076,8077,8078,8079,
+ 8080,8081,8082,8083,8084,8085,8086,8087,8088,8089,8090,8091,8092,8093,8094,8095,8096,8097,8098,8099,
+ 8100,8101,8102,8103,8104,8105,8106,8107,8108,8109,8110,8111,8114,8115,8116,8118,8119,8120,8121,8122,
+ 8123,8124,8126,8130,8131,8132,8134,8135,8136,8137,8138,8139,8140,8146,8147,8150,8151,8152,8153,8154,
+ 8155,8162,8163,8164,8166,8167,8168,8169,8170,8171,8172,8178,8179,8180,8182,8183,8184,8185,8186,8187,
+ 8188,8360,8450,8451,8455,8457,8459,8460,8461,8464,8465,8466,8469,8470,8473,8474,8475,8476,8477,8480,
+ 8481,8482,8484,8486,8488,8490,8491,8492,8493,8496,8497,8499,8510,8511,8517,8544,8545,8546,8547,8548,
+ 8549,8550,8551,8552,8553,8554,8555,8556,8557,8558,8559,9398,9399,9400,9401,9402,9403,9404,9405,9406,
+ 9407,9408,9409,9410,9411,9412,9413,9414,9415,9416,9417,9418,9419,9420,9421,9422,9423,13169,13171,13173,
+ 13184,13185,13186,13187,13188,13189,13190,13191,13194,13195,13196,13200,13201,13202,13203,13204,13225,13226,13227,13228,
+ 13236,13237,13238,13239,13240,13241,13242,13243,13244,13245,13246,13247,13248,13249,13251,13254,13255,13256,13257,13259,
+ 13261,13262,13271,13273,13274,13276,13277,64256,64257,64258,64259,64260,64261,64262,64275,64276,64277,64278,64279,65313,
+ 65314,65315,65316,65317,65318,65319,65320,65321,65322,65323,65324,65325,65326,65327,65328,65329,65330,65331,65332,65333,
+ 65334,65335,65336,65337,65338,66560,66561,66562,66563,66564,66565,66566,66567,66568,66569,66570,66571,66572,66573,66574,
+ 66575,66576,66577,66578,66579,66580,66581,66582,66583,66584,66585,66586,66587,66588,66589,66590,66591,66592,66593,66594,
+ 66595,66596,66597,119808,119809,119810,119811,119812,119813,119814,119815,119816,119817,119818,119819,119820,119821,119822,119823,119824,
+ 119825,119826,119827,119828,119829,119830,119831,119832,119833,119860,119861,119862,119863,119864,119865,119866,119867,119868,119869,119870,
+ 119871,119872,119873,119874,119875,119876,119877,119878,119879,119880,119881,119882,119883,119884,119885,119912,119913,119914,119915,119916,
+ 119917,119918,119919,119920,119921,119922,119923,119924,119925,119926,119927,119928,119929,119930,119931,119932,119933,119934,119935,119936,
+ 119937,119964,119966,119967,119970,119973,119974,119977,119978,119979,119980,119982,119983,119984,119985,119986,119987,119988,119989,120016,
+ 120017,120018,120019,120020,120021,120022,120023,120024,120025,120026,120027,120028,120029,120030,120031,120032,120033,120034,120035,120036,
+ 120037,120038,120039,120040,120041,120068,120069,120071,120072,120073,120074,120077,120078,120079,120080,120081,120082,120083,120084,120086,
+ 120087,120088,120089,120090,120091,120092,120120,120121,120123,120124,120125,120126,120128,120129,120130,120131,120132,120134,120138,120139,
+ 120140,120141,120142,120143,120144,120172,120173,120174,120175,120176,120177,120178,120179,120180,120181,120182,120183,120184,120185,120186,
+ 120187,120188,120189,120190,120191,120192,120193,120194,120195,120196,120197,120224,120225,120226,120227,120228,120229,120230,120231,120232,
+ 120233,120234,120235,120236,120237,120238,120239,120240,120241,120242,120243,120244,120245,120246,120247,120248,120249,120276,120277,120278,
+ 120279,120280,120281,120282,120283,120284,120285,120286,120287,120288,120289,120290,120291,120292,120293,120294,120295,120296,120297,120298,
+ 120299,120300,120301,120328,120329,120330,120331,120332,120333,120334,120335,120336,120337,120338,120339,120340,120341,120342,120343,120344,
+ 120345,120346,120347,120348,120349,120350,120351,120352,120353,120380,120381,120382,120383,120384,120385,120386,120387,120388,120389,120390,
+ 120391,120392,120393,120394,120395,120396,120397,120398,120399,120400,120401,120402,120403,120404,120405,120432,120433,120434,120435,120436,
+ 120437,120438,120439,120440,120441,120442,120443,120444,120445,120446,120447,120448,120449,120450,120451,120452,120453,120454,120455,120456,
+ 120457,120488,120489,120490,120491,120492,120493,120494,120495,120496,120497,120498,120499,120500,120501,120502,120503,120504,120505,120506,
+ 120507,120508,120509,120510,120511,120512,120531,120546,120547,120548,120549,120550,120551,120552,120553,120554,120555,120556,120557,120558,
+ 120559,120560,120561,120562,120563,120564,120565,120566,120567,120568,120569,120570,120589,120604,120605,120606,120607,120608,120609,120610,
+ 120611,120612,120613,120614,120615,120616,120617,120618,120619,120620,120621,120622,120623,120624,120625,120626,120627,120628,120647,120662,
+ 120663,120664,120665,120666,120667,120668,120669,120670,120671,120672,120673,120674,120675,120676,120677,120678,120679,120680,120681,120682,
+ 120683,120684,120685,120686,120705,120720,120721,120722,120723,120724,120725,120726,120727,120728,120729,120730,120731,120732,120733,120734,
+ 120735,120736,120737,120738,120739,120740,120741,120742,120743,120744,120763};
+
+ public static final int[][] b2data = {
+ {97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},
+ {117},{118},{119},{120},{121},{122},{956},{224},{225},{226},{227},{228},{229},{230},{231},{232},{233},{234},{235},{236},
+ {237},{238},{239},{240},{241},{242},{243},{244},{245},{246},{248},{249},{250},{251},{252},{253},{254},{115,115},{257},{259},
+ {261},{263},{265},{267},{269},{271},{273},{275},{277},{279},{281},{283},{285},{287},{289},{291},{293},{295},{297},{299},
+ {301},{303},{105,775},{307},{309},{311},{314},{316},{318},{320},{322},{324},{326},{328},{700,110},{331},{333},{335},{337},{339},
+ {341},{343},{345},{347},{349},{351},{353},{355},{357},{359},{361},{363},{365},{367},{369},{371},{373},{375},{255},{378},
+ {380},{382},{115},{595},{387},{389},{596},{392},{598},{599},{396},{477},{601},{603},{402},{608},{611},{617},{616},{409},
+ {623},{626},{629},{417},{419},{421},{640},{424},{643},{429},{648},{432},{650},{651},{436},{438},{658},{441},{445},{454},
+ {454},{457},{457},{460},{460},{462},{464},{466},{468},{470},{472},{474},{476},{479},{481},{483},{485},{487},{489},{491},
+ {493},{495},{106,780},{499},{499},{501},{405},{447},{505},{507},{509},{511},{513},{515},{517},{519},{521},{523},{525},{527},
+ {529},{531},{533},{535},{537},{539},{541},{543},{414},{547},{549},{551},{553},{555},{557},{559},{561},{563},{953},{32,953},
+ {940},{941},{942},{943},{972},{973},{974},{953,776,769},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},
+ {957},{958},{959},{960},{961},{963},{964},{965},{966},{967},{968},{969},{970},{971},{965,776,769},{963},{946},{952},{965},{973},
+ {971},{966},{960},{985},{987},{989},{991},{993},{995},{997},{999},{1001},{1003},{1005},{1007},{954},{961},{963},{952},{949},
+ {1104},{1105},{1106},{1107},{1108},{1109},{1110},{1111},{1112},{1113},{1114},{1115},{1116},{1117},{1118},{1119},{1072},{1073},{1074},{1075},
+ {1076},{1077},{1078},{1079},{1080},{1081},{1082},{1083},{1084},{1085},{1086},{1087},{1088},{1089},{1090},{1091},{1092},{1093},{1094},{1095},
+ {1096},{1097},{1098},{1099},{1100},{1101},{1102},{1103},{1121},{1123},{1125},{1127},{1129},{1131},{1133},{1135},{1137},{1139},{1141},{1143},
+ {1145},{1147},{1149},{1151},{1153},{1163},{1165},{1167},{1169},{1171},{1173},{1175},{1177},{1179},{1181},{1183},{1185},{1187},{1189},{1191},
+ {1193},{1195},{1197},{1199},{1201},{1203},{1205},{1207},{1209},{1211},{1213},{1215},{1218},{1220},{1222},{1224},{1226},{1228},{1230},{1233},
+ {1235},{1237},{1239},{1241},{1243},{1245},{1247},{1249},{1251},{1253},{1255},{1257},{1259},{1261},{1263},{1265},{1267},{1269},{1273},{1281},
+ {1283},{1285},{1287},{1289},{1291},{1293},{1295},{1377},{1378},{1379},{1380},{1381},{1382},{1383},{1384},{1385},{1386},{1387},{1388},{1389},
+ {1390},{1391},{1392},{1393},{1394},{1395},{1396},{1397},{1398},{1399},{1400},{1401},{1402},{1403},{1404},{1405},{1406},{1407},{1408},{1409},
+ {1410},{1411},{1412},{1413},{1414},{1381,1410},{7681},{7683},{7685},{7687},{7689},{7691},{7693},{7695},{7697},{7699},{7701},{7703},{7705},{7707},
+ {7709},{7711},{7713},{7715},{7717},{7719},{7721},{7723},{7725},{7727},{7729},{7731},{7733},{7735},{7737},{7739},{7741},{7743},{7745},{7747},
+ {7749},{7751},{7753},{7755},{7757},{7759},{7761},{7763},{7765},{7767},{7769},{7771},{7773},{7775},{7777},{7779},{7781},{7783},{7785},{7787},
+ {7789},{7791},{7793},{7795},{7797},{7799},{7801},{7803},{7805},{7807},{7809},{7811},{7813},{7815},{7817},{7819},{7821},{7823},{7825},{7827},
+ {7829},{104,817},{116,776},{119,778},{121,778},{97,702},{7777},{7841},{7843},{7845},{7847},{7849},{7851},{7853},{7855},{7857},{7859},{7861},{7863},{7865},
+ {7867},{7869},{7871},{7873},{7875},{7877},{7879},{7881},{7883},{7885},{7887},{7889},{7891},{7893},{7895},{7897},{7899},{7901},{7903},{7905},
+ {7907},{7909},{7911},{7913},{7915},{7917},{7919},{7921},{7923},{7925},{7927},{7929},{7936},{7937},{7938},{7939},{7940},{7941},{7942},{7943},
+ {7952},{7953},{7954},{7955},{7956},{7957},{7968},{7969},{7970},{7971},{7972},{7973},{7974},{7975},{7984},{7985},{7986},{7987},{7988},{7989},
+ {7990},{7991},{8000},{8001},{8002},{8003},{8004},{8005},{965,787},{965,787,768},{965,787,769},{965,787,834},{8017},{8019},{8021},{8023},{8032},{8033},{8034},{8035},
+ {8036},{8037},{8038},{8039},{7936,953},{7937,953},{7938,953},{7939,953},{7940,953},{7941,953},{7942,953},{7943,953},{7936,953},{7937,953},{7938,953},{7939,953},{7940,953},{7941,953},{7942,953},{7943,953},
+ {7968,953},{7969,953},{7970,953},{7971,953},{7972,953},{7973,953},{7974,953},{7975,953},{7968,953},{7969,953},{7970,953},{7971,953},{7972,953},{7973,953},{7974,953},{7975,953},{8032,953},{8033,953},{8034,953},{8035,953},
+ {8036,953},{8037,953},{8038,953},{8039,953},{8032,953},{8033,953},{8034,953},{8035,953},{8036,953},{8037,953},{8038,953},{8039,953},{8048,953},{945,953},{940,953},{945,834},{945,834,953},{8112},{8113},{8048},
+ {8049},{945,953},{953},{8052,953},{951,953},{942,953},{951,834},{951,834,953},{8050},{8051},{8052},{8053},{951,953},{953,776,768},{953,776,769},{953,834},{953,776,834},{8144},{8145},{8054},
+ {8055},{965,776,768},{965,776,769},{961,787},{965,834},{965,776,834},{8160},{8161},{8058},{8059},{8165},{8060,953},{969,953},{974,953},{969,834},{969,834,953},{8056},{8057},{8060},{8061},
+ {969,953},{114,115},{99},{176,99},{603},{176,102},{104},{104},{104},{105},{105},{108},{110},{110,111},{112},{113},{114},{114},{114},{115,109},
+ {116,101,108},{116,109},{122},{969},{122},{107},{229},{98},{99},{101},{102},{109},{947},{960},{100},{8560},{8561},{8562},{8563},{8564},
+ {8565},{8566},{8567},{8568},{8569},{8570},{8571},{8572},{8573},{8574},{8575},{9424},{9425},{9426},{9427},{9428},{9429},{9430},{9431},{9432},
+ {9433},{9434},{9435},{9436},{9437},{9438},{9439},{9440},{9441},{9442},{9443},{9444},{9445},{9446},{9447},{9448},{9449},{104,112,97},{97,117},{111,118},
+ {112,97},{110,97},{956,97},{109,97},{107,97},{107,98},{109,98},{103,98},{112,102},{110,102},{956,102},{104,122},{107,104,122},{109,104,122},{103,104,122},{116,104,122},{112,97},{107,112,97},{109,112,97},{103,112,97},
+ {112,118},{110,118},{956,118},{109,118},{107,118},{109,118},{112,119},{110,119},{956,119},{109,119},{107,119},{109,119},{107,969},{109,969},{98,113},{99,8725,107,103},{99,111,46},{100,98},{103,121},{104,112},
+ {107,107},{107,109},{112,104},{112,112,109},{112,114},{115,118},{119,98},{102,102},{102,105},{102,108},{102,102,105},{102,102,108},{115,116},{115,116},{1396,1398},{1396,1381},{1396,1387},{1406,1398},{1396,1389},{65345},
+ {65346},{65347},{65348},{65349},{65350},{65351},{65352},{65353},{65354},{65355},{65356},{65357},{65358},{65359},{65360},{65361},{65362},{65363},{65364},{65365},
+ {65366},{65367},{65368},{65369},{65370},{66600},{66601},{66602},{66603},{66604},{66605},{66606},{66607},{66608},{66609},{66610},{66611},{66612},{66613},{66614},
+ {66615},{66616},{66617},{66618},{66619},{66620},{66621},{66622},{66623},{66624},{66625},{66626},{66627},{66628},{66629},{66630},{66631},{66632},{66633},{66634},
+ {66635},{66636},{66637},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},
+ {114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},
+ {108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},
+ {102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},
+ {122},{97},{99},{100},{103},{106},{107},{110},{111},{112},{113},{115},{116},{117},{118},{119},{120},{121},{122},{97},
+ {98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},
+ {118},{119},{120},{121},{122},{97},{98},{100},{101},{102},{103},{106},{107},{108},{109},{110},{111},{112},{113},{115},
+ {116},{117},{118},{119},{120},{121},{97},{98},{100},{101},{102},{103},{105},{106},{107},{108},{109},{111},{115},{116},
+ {117},{118},{119},{120},{121},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},
+ {112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},
+ {106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},
+ {100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},
+ {120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},
+ {114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},{102},{103},{104},{105},{106},{107},
+ {108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},{122},{97},{98},{99},{100},{101},
+ {102},{103},{104},{105},{106},{107},{108},{109},{110},{111},{112},{113},{114},{115},{116},{117},{118},{119},{120},{121},
+ {122},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},
+ {964},{965},{966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},
+ {958},{959},{960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},
+ {952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963},{945},
+ {946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},{960},{961},{952},{963},{964},{965},
+ {966},{967},{968},{969},{963},{945},{946},{947},{948},{949},{950},{951},{952},{953},{954},{955},{956},{957},{958},{959},
+ {960},{961},{952},{963},{964},{965},{966},{967},{968},{969},{963}};
+
+
+ public static final int[] B2(int c) {
+ int i = CharUtils.get_index(b2index, c);
+ return i > -1 ? b2data[i] : null;
+ }
+
+ public static boolean isB1(int c) {
+ return CharUtils.invset_contains(B1, c);
+ }
+
+ public static boolean isProhibited(int c) {
+ if ((c & 0xFFFF) == 0xFFFF) return true;
+ if ((c & 0xFFFE) == 0xFFFE) return true;
+ return CharUtils.invset_contains(PROHIBITED, c);
+ }
+
+
+ public static boolean isRandAL(int c) {
+ return CharUtils.invset_contains(RandAL, c);
+ }
+
+ public static boolean isLCat(int c) {
+ return !CharUtils.invset_contains(notLCat, c);
+ }
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Normalizer.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,171 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+import org.apache.abdera.i18n.text.data.UnicodeCharacterDatabase;
+
+
+/**
+ * Performs Unicode Normalization (Form D,C,KD and KC)
+ */
+public final class Normalizer {
+
+ private enum Mask {
+ NONE,
+ COMPATIBILITY,
+ COMPOSITION
+ }
+
+ public enum Form {
+ D,
+ C(Mask.COMPOSITION),
+ KD(Mask.COMPATIBILITY),
+ KC(Mask.COMPATIBILITY,Mask.COMPOSITION);
+
+ private int mask = 0;
+
+ Form(Mask... masks) {
+ for (Mask mask : masks) {
+ this.mask |= (mask.ordinal());
+ }
+ }
+
+ public boolean isCompatibility() {
+ return (mask & (Mask.COMPATIBILITY.ordinal())) != 0;
+ }
+
+ public boolean isCanonical() {
+ return !isCompatibility();
+ }
+
+ public boolean isComposition() {
+ return (mask & (Mask.COMPOSITION.ordinal())) != 0;
+ }
+ }
+
+ private Normalizer() {}
+
+ /**
+ * Normalize the string using NFKC
+ */
+ public static String normalize(CharSequence source) {
+ return normalize(source, Form.KC);
+ }
+
+ /**
+ * Normalize the string using the specified Form
+ */
+ public static String normalize(
+ CharSequence source,
+ Form form) {
+ return normalize(source, form, new StringBuilder());
+ }
+
+ /**
+ * Normalize the string into the given StringBuilder using the given Form
+ */
+ public static String normalize(
+ CharSequence source,
+ Form form,
+ StringBuilder buf) {
+ if (source.length() != 0) {
+ try {
+ decompose(source, form, buf);
+ compose(form, buf);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return buf.toString();
+ }
+
+ private static void decompose(
+ CharSequence source,
+ Form form,
+ StringBuilder buf)
+ throws IOException {
+ StringBuilder internal = new StringBuilder();
+ CodepointIterator ci = CodepointIterator.forCharSequence(source);
+ boolean canonical = form.isCanonical();
+ while (ci.hasNext()) {
+ Codepoint c = ci.next();
+ internal.setLength(0);
+ UnicodeCharacterDatabase.decompose(c.getValue(), canonical, internal);
+ CodepointIterator ii = CodepointIterator.forCharSequence(internal);
+ while(ii.hasNext()) {
+ Codepoint ch = ii.next();
+ int i = findInsertionPoint(buf, ch.getValue());
+ buf.insert(i,CharUtils.toString(ch.getValue()));
+ }
+ }
+
+ }
+
+ private static int findInsertionPoint(
+ StringBuilder buf, int c) {
+ int cc = UnicodeCharacterDatabase.getCanonicalClass(c);
+ int i = buf.length();
+ if (cc != 0) {
+ int ch;
+ for (; i > 0; i -= CharUtils.length(c)) {
+ ch = CharUtils.codepointAt(buf, i-1).getValue();
+ if (UnicodeCharacterDatabase.getCanonicalClass(ch) <= cc) break;
+ }
+ }
+ return i;
+ }
+
+ private static void compose(
+ Form form,
+ StringBuilder buf)
+ throws IOException {
+ if (!form.isComposition()) return;
+ int pos = 0;
+ int lc = CharUtils.codepointAt(buf, pos).getValue();
+ int cpos = CharUtils.length(lc);
+ int lcc = UnicodeCharacterDatabase.getCanonicalClass(lc);
+ if (lcc != 0) lcc = 256;
+ int len = buf.length();
+ int c;
+ for (int dpos = cpos; dpos < buf.length(); dpos += CharUtils.length(c)) {
+ c = CharUtils.codepointAt(buf,dpos).getValue();
+ int cc = UnicodeCharacterDatabase.getCanonicalClass(c);
+ int composite = UnicodeCharacterDatabase.getPairComposition(lc, c);
+ if (composite != '\uFFFF' && (lcc < cc || lcc == 0)) {
+ CharUtils.setChar(buf, pos, composite);
+ lc = composite;
+ } else {
+ if (cc == 0) {
+ pos = cpos;
+ lc = c;
+ }
+ lcc = cc;
+ CharUtils.setChar(buf,cpos,c);
+ if (buf.length() != len) {
+ dpos += buf.length() - len;
+ len = buf.length();
+ }
+ cpos += CharUtils.length(c);
+ }
+ }
+ buf.setLength(cpos);
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Punycode.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,206 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.IOException;
+
+/**
+ * Implementation of the Punycode encoding scheme used by IDNA
+ */
+public final class Punycode {
+
+ static final int base = 0x24; // 36
+ static final int tmin = 0x01; // 1
+ static final int tmax = 0x1A; // 26
+ static final int skew = 0x26; // 38
+ static final int damp = 0x02BC; // 700
+ static final int initial_bias = 0x48; // 72
+ static final int initial_n = 0x80; //0x80
+ static final int delimiter = 0x2D; //0x2D
+
+ Punycode() {}
+
+ private static boolean basic(int cp) {
+ return cp < 0x80;
+ }
+
+ private static boolean delim(int cp) {
+ return cp == delimiter;
+ }
+
+ private static boolean flagged(int bcp) {
+ return (bcp - 65) < 26;
+ }
+
+ private static int decode_digit(int cp) {
+ return (cp - 48 < 10) ?
+ cp - 22 :
+ (cp - 65 < 26) ?
+ cp - 65 :
+ (cp - 97 < 26) ?
+ cp - 97 :
+ base;
+ }
+
+ private static int t(boolean c) {
+ return (c)?1:0;
+ }
+
+ private static int encode_digit(int d, boolean upper) {
+ return (d + 22 + 75 * t(d<26)) - (t(upper) << 5);
+ }
+
+ private static int adapt(int delta, int numpoints, boolean firsttime) {
+ int k;
+ delta = (firsttime) ? delta / damp : delta >> 1;
+ delta += delta / numpoints;
+ for (k = 0; delta > ((base - tmin) * tmax) / 2; k += base) {
+ delta /= base - tmin;
+ }
+ return k + (base - tmin + 1) * delta / (delta + skew);
+ }
+
+ public static String encode(
+ char[] chars,
+ boolean[] case_flags)
+ throws IOException {
+ StringBuilder buf = new StringBuilder();
+ CodepointIterator ci = CodepointIterator.forCharArray(chars);
+ int n, delta, h, b, bias, m, q, k, t;
+ n = initial_n;
+ delta = 0;
+ bias = initial_bias;
+ int i = -1;
+ while (ci.hasNext()) {
+ i = ci.next().getValue();
+ if (basic(i)) {
+ if (case_flags != null) {
+ } else {
+ buf.append((char)i);
+ }
+ }
+ }
+ h = b = buf.length();
+ if (b > 0) buf.append((char)delimiter);
+ while (h < chars.length) {
+ ci.position(0);
+ i = -1;
+ m = Integer.MAX_VALUE;
+ while(ci.hasNext()) {
+ i = ci.next().getValue();
+ if (i >= n && i < m) m = i;
+ }
+ if (m - n > (Integer.MAX_VALUE - delta) / (h + 1))
+ throw new IOException("Overflow");
+ delta += (m-n) * (h+1);
+ n = m;
+ ci.position(0);
+ i = -1;
+ while (ci.hasNext()) {
+ i = ci.next().getValue();
+ if (i < n) {
+ if (++delta == 0) throw new IOException("Overflow");
+ }
+ if (i == n) {
+ for (q = delta, k = base;; k+= base) {
+ t = k <= bias ? tmin : k >= bias + tmax ? tmax : k - bias;
+ if (q < t) break;
+ buf.append((char)encode_digit(t+(q-t)%(base-t),false));
+ q = (q-t) / (base-t);
+ }
+ buf.append((char)encode_digit(
+ q, (case_flags!=null)?case_flags[ci.position()-1]:false));
+ bias = adapt(delta,h+1,h==b);
+ delta=0;
+ ++h;
+ }
+ }
+ ++delta; ++n;
+ }
+ return buf.toString();
+ }
+
+ public static String encode(String s) {
+ try {
+ if (s == null) return null;
+ return encode(s.toCharArray(),null).toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static String decode(String s) {
+ try {
+ if (s == null) return null;
+ return decode(s.toCharArray(),null).toString();
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
+
+ public static String decode(
+ char[] chars,
+ boolean[] case_flags)
+ throws IOException {
+ StringBuilder buf = new StringBuilder();
+ int n, out, i, bias, b, j, in, oldi, w, k, digit, t;
+ n = initial_n;
+ out = i = 0;
+ bias = initial_bias;
+ for (b = j = 0; j < chars.length; ++j)
+ if (delim(chars[j])) b = j;
+ for (j = 0; j < b; ++j) {
+ if (case_flags != null) case_flags[out] = flagged(chars[j]);
+ if (!basic(chars[j])) throw new IOException("Bad Input");
+ buf.append((char)chars[j]);
+ }
+ out = buf.length();
+ for (in = (b > 0) ? b + 1 : 0; in < chars.length; ++out) {
+ for (oldi = i, w = 1, k = base; ; k += base) {
+ if (in > chars.length) throw new IOException("Bad input");
+ digit = decode_digit(chars[in++]);
+ if (digit >= base) throw new IOException("Bad input");
+ if (digit > (Integer.MAX_VALUE - i) / w) throw new IOException("Overflow");
+ i += digit * w;
+ t = (k <= bias) ?
+ tmin :
+ (k >= bias + tmax) ?
+ tmax :
+ k - bias;
+ if (digit < t) break;
+ if (w > Integer.MAX_VALUE / (base - t)) throw new IOException("Overflow");
+ w *= (base - t);
+ }
+ bias = adapt(i - oldi, out + 1, oldi == 0);
+ if (i / (out + 1) > Integer.MAX_VALUE - n) throw new IOException("Overflow");
+ n += i / (out + 1);
+ i %= (out + 1);
+ if (case_flags != null) {
+ System.arraycopy( // not sure if this is right
+ case_flags, i,
+ case_flags, i+CharUtils.length(n),
+ case_flags.length-i);
+ }
+ CharUtils.insert(buf, i++, n);
+ }
+ return buf.toString();
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/Sanitizer.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,103 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+
+public class Sanitizer {
+
+ public static final String SANITIZE_PATTERN = "[^A-Za-z0-9\\%!$&\\\\'()*+,;=_]+";
+
+ public static String sanitize(String slug) {
+ return sanitize(slug, null, false, null, SANITIZE_PATTERN);
+ }
+
+ public static String sanitize(String slug, String filler) {
+ return sanitize(slug, filler, false, null, SANITIZE_PATTERN);
+ }
+
+ public static String sanitize(String slug, String filler, boolean lower) {
+ return sanitize(slug, filler, lower, null, SANITIZE_PATTERN);
+ }
+
+ public static String sanitize(String slug, String filler, String pattern) {
+ return sanitize(slug, filler, false, null, pattern);
+ }
+
+ public static String sanitize(String slug, String filler, boolean lower, String pattern) {
+ return sanitize(slug, filler, lower, null, pattern);
+ }
+
+ public static String sanitize(
+ String slug,
+ String filler,
+ boolean lower,
+ Normalizer.Form form) {
+ return sanitize(slug,filler,lower,form,SANITIZE_PATTERN);
+ }
+
+ /**
+ * Used to sanitize a string. Optionally performs Unicode Form KD normalization
+ * on a string to break extended characters down, then replaces non alphanumeric
+ * characters with a specified filler replacement.
+ * @param slug The source string
+ * @param filler The replacement string
+ * @param lower True if the result should be lowercase
+ * @param form Unicode Normalization form to use (or null)
+ */
+ public static String sanitize(
+ String slug,
+ String filler,
+ boolean lower,
+ Normalizer.Form form,
+ String pattern) {
+ if (slug == null) return null;
+ if (lower) slug = slug.toLowerCase();
+ if (form != null) {
+ try {
+ slug =
+ Normalizer.normalize(
+ slug, form);
+ } catch (Exception e) {}
+ }
+ slug = slug.replaceAll("\\s+", "_");
+ if (filler != null) {
+ slug = slug.replaceAll(pattern,filler);
+ } else {
+ slug = UrlEncoding.encode(slug, PathNoDelimFilter);
+ }
+ return slug;
+ }
+
+ private static final Filter PathNoDelimFilter =
+ new Filter() {
+ public boolean accept(int c) {
+ return CharUtils.isAlphaDigit(c) ||
+ c == '-' ||
+ c == '.' ||
+ c == '_' ||
+ c == '~' ||
+ c == '&' ||
+ c == '=' ||
+ c == '+' ||
+ c == '$' ||
+ c == ',' ||
+ c == ';' ||
+ c == '%';
+ }
+ };
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/UrlEncoding.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,593 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text;
+
+import java.io.ByteArrayInputStream;
+import java.io.FilterInputStream;
+import java.io.FilterOutputStream;
+import java.io.FilterReader;
+import java.io.FilterWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.nio.CharBuffer;
+
+
+/**
+ * Performs URL Percent Encoding
+ */
+public final class UrlEncoding {
+
+ private static final String DEFAULT_ENCODING = "UTF-8";
+ public final static char[] HEX = {
+ '0','1','2','3','4','5','6','7',
+ '8','9','A','B','C','D','E','F'
+ };
+
+ private UrlEncoding() {}
+
+ private static void encode(Appendable sb, byte... bytes) {
+ encode(sb,0,bytes.length,bytes);
+ }
+
+ private static void encode(Appendable sb, int offset, int length, byte... bytes) {
+ try {
+ for (int n = offset, i = 0; n < bytes.length && i < length; n++, i++) {
+ byte c = bytes[n];
+ sb.append("%");
+ sb.append(HEX[(c >> 4) & 0x0f]);
+ sb.append(HEX[(c >> 0) & 0x0f]);
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static String encode(char... chars) {
+ return encode(chars,0,chars.length,DEFAULT_ENCODING,new Filter[0]);
+ }
+
+ public static String encode(char[] chars, Filter Filter) {
+ return encode(chars,0,chars.length,DEFAULT_ENCODING,new Filter[] {Filter});
+ }
+
+ public static String encode(char[] chars, Filter... filters) {
+ return encode(chars,0,chars.length,DEFAULT_ENCODING,filters);
+ }
+
+ public static String encode(char[] chars, String enc) {
+ return encode(chars,0,chars.length,enc,new Filter[0]);
+ }
+
+ public static String encode(char[] chars, String enc, Filter Filter) {
+ return encode(chars,0,chars.length,enc,new Filter[] {Filter});
+ }
+
+ public static String encode(char[] chars, String enc, Filter... filters) {
+ return encode(chars,0,chars.length,enc,filters);
+ }
+
+ public static String encode(char[] chars, int offset, int length) {
+ return encode(chars,offset,length,DEFAULT_ENCODING,new Filter[0]);
+ }
+
+ public static String encode(char[] chars, int offset, int length, String enc) {
+ return encode(chars,offset,length,enc,new Filter[0]);
+ }
+
+ public static String encode(char[] chars, int offset, int length, Filter Filter) {
+ return encode(chars,offset,length,DEFAULT_ENCODING,new Filter[] {Filter});
+ }
+
+ public static String encode(char[] chars, int offset, int length, Filter... filters) {
+ return encode(chars,offset,length,DEFAULT_ENCODING,filters);
+ }
+
+ public static String encode(char[] chars, int offset, int length, String enc, Filter Filter) {
+ return encode(chars,offset,length,enc,new Filter[] {Filter});
+ }
+
+ public static String encode(char[] chars, int offset, int length, String enc, Filter... filters) {
+ try {
+ return encode((CharSequence)CharBuffer.wrap(chars,offset,length),enc,filters);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public static String encode(InputStream in) throws IOException {
+ StringBuilder buf = new StringBuilder();
+ byte[] chunk = new byte[1024];
+ int r = -1;
+ while((r = in.read(chunk)) > -1)
+ encode(buf,0,r,chunk);
+ return buf.toString();
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset) throws IOException {
+ return encode(in,charset,DEFAULT_ENCODING,new Filter[0]);
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset,
+ Filter Filter)
+ throws IOException {
+ return encode(in,charset,DEFAULT_ENCODING,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset,
+ String enc) throws IOException {
+ return encode(in,charset,enc,new Filter[0]);
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset,
+ String enc,
+ Filter Filter)
+ throws IOException {
+ return encode(in,charset,enc,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset,
+ String enc,
+ Filter... filters)
+ throws IOException {
+ return encode(new InputStreamReader(in,charset),enc,filters);
+ }
+
+ public static String encode(
+ InputStream in,
+ String charset,
+ Filter... filters)
+ throws IOException {
+ return encode(new InputStreamReader(in,charset),DEFAULT_ENCODING,filters);
+ }
+
+ public static String encode(
+ Reader reader)
+ throws IOException {
+ return encode(reader,DEFAULT_ENCODING, new Filter[0]);
+ }
+
+ public static String encode(
+ Readable readable)
+ throws IOException {
+ return encode(readable,DEFAULT_ENCODING, new Filter[0]);
+ }
+
+ public static String encode(
+ Reader reader,
+ String enc)
+ throws IOException {
+ return encode(reader, enc, new Filter[0]);
+ }
+
+ public static String encode(
+ Readable readable,
+ String enc)
+ throws IOException {
+ return encode(readable, enc, new Filter[0]);
+ }
+
+ public static String encode(
+ Reader reader,
+ String enc,
+ Filter Filter)
+ throws IOException {
+ return encode(reader,enc,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ Reader reader,
+ Filter Filter)
+ throws IOException {
+ return encode(reader,DEFAULT_ENCODING,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ Reader reader,
+ Filter... filters)
+ throws IOException {
+ return encode(reader,DEFAULT_ENCODING,filters);
+ }
+
+ public static String encode(
+ Readable readable,
+ String enc,
+ Filter Filter)
+ throws IOException {
+ return encode(readable,enc,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ Readable readable,
+ Filter Filter)
+ throws IOException {
+ return encode(readable,DEFAULT_ENCODING,new Filter[] {Filter});
+ }
+
+ public static String encode(
+ Readable readable,
+ Filter... filters)
+ throws IOException {
+ return encode(readable,DEFAULT_ENCODING,filters);
+ }
+
+ private static void processChars(
+ StringBuilder sb,
+ CharBuffer chars,
+ String enc,
+ Filter... filters)
+ throws IOException {
+ for (int n = 0; n < chars.length(); n++) {
+ char c = chars.charAt(n);
+ if (!CharUtils.isHighSurrogate(c) && check(c,filters)) {
+ encode(sb,String.valueOf(c).getBytes(enc));
+ } else if (CharUtils.isHighSurrogate(c)) {
+ if (check(c,filters)) {
+ StringBuilder buf = new StringBuilder();
+ buf.append(c);
+ buf.append(chars.charAt(++n));
+ byte[] b = buf.toString().getBytes(enc);
+ encode(sb,b);
+ } else {
+ sb.append(c);
+ sb.append(chars.charAt(++n));
+ }
+ } else {
+ sb.append(c);
+ }
+ }
+ }
+
+ public static String encode(
+ Readable readable,
+ String enc,
+ Filter... filters)
+ throws IOException {
+ StringBuilder sb = new StringBuilder();
+ CharBuffer chars = CharBuffer.allocate(1024);
+ while (readable.read(chars) > -1) {
+ chars.flip();
+ processChars(sb, chars, enc, filters);
+ }
+ return sb.toString();
+ }
+
+ public static String encode(
+ Reader reader,
+ String enc,
+ Filter... filters)
+ throws IOException {
+ StringBuilder sb = new StringBuilder();
+ char[] chunk = new char[1024];
+ int r = -1;
+ while ((r = reader.read(chunk)) > -1)
+ processChars(
+ sb, CharBuffer.wrap(chunk, 0, r),
+ enc, filters);
+ return sb.toString();
+ }
+
+ public static String encode(byte... bytes) {
+ StringBuilder buf = new StringBuilder();
+ encode(buf,bytes);
+ return buf.toString();
+ }
+
+ public static String encode(byte[] bytes, int off, int len) {
+ StringBuilder buf = new StringBuilder();
+ encode(buf,off,len,bytes);
+ return buf.toString();
+ }
+
+ public static String encode(CharSequence s) {
+ return encode(s,Filter.NONOPFILTER);
+ }
+
+ public static String encode(CharSequence s, Filter Filter) {
+ return encode(s, new Filter[] {Filter});
+ }
+
+ public static String encode(CharSequence s, Filter... filters) {
+ try {
+ if (s == null) return null;
+ return encode(s,"utf-8",filters);
+ } catch (UnsupportedEncodingException e) {
+ return null; // shouldn't happen
+ }
+ }
+
+ public static String encode(CharSequence s, int offset, int length) {
+ return encode(s,offset,length,Filter.NONOPFILTER);
+ }
+
+ public static String encode(CharSequence s, int offset, int length, Filter Filter) {
+ return encode(s,offset,length, new Filter[] {Filter});
+ }
+
+ public static String encode(CharSequence s, int offset, int length, Filter... filters) {
+ try {
+ if (s == null) return null;
+ return encode(s,offset,length,"utf-8",filters);
+ } catch (UnsupportedEncodingException e) {
+ return null; // shouldn't happen
+ }
+ }
+
+ private static boolean check(int codepoint, Filter... filters) {
+ for (Filter Filter : filters) {
+ if (Filter.accept(codepoint)) return true;
+ }
+ return false;
+ }
+
+ public static String encode(
+ CharSequence s,
+ int offset,
+ int length,
+ String enc,
+ Filter... filters)
+ throws UnsupportedEncodingException {
+ int end = Math.min(s.length(), offset+length);
+ CharSequence seq = s.subSequence(offset, end);
+ return encode(seq,enc,filters);
+ }
+
+ public static String encode(
+ CharSequence s,
+ String enc,
+ Filter... filters)
+ throws UnsupportedEncodingException {
+ if (s == null) return s.toString();
+ StringBuilder sb = new StringBuilder();
+
+ for (int n = 0; n < s.length(); n++) {
+ char c = s.charAt(n);
+ if (!CharUtils.isHighSurrogate(c) && check(c,filters)) {
+ encode(sb,String.valueOf(c).getBytes(enc));
+ } else if (CharUtils.isHighSurrogate(c)) {
+ if (check(c,filters)) {
+ StringBuilder buf = new StringBuilder();
+ buf.append(c);
+ buf.append(s.charAt(++n));
+ byte[] b = buf.toString().getBytes(enc);
+ encode(sb,b);
+ } else {
+ sb.append(c);
+ sb.append(s.charAt(++n));
+ }
+ } else {
+ sb.append(c);
+ }
+ }
+ return sb.toString();
+ }
+
+ public static String decode(String e, String enc)
+ throws UnsupportedEncodingException {
+ DecodingReader r = new DecodingReader(e.getBytes(enc),enc);
+ char[] buf = new char[e.length()];
+ try {
+ int l = r.read(buf);
+ e = new String(buf,0,l);
+ } catch (Exception ex) {}
+ return e;
+ }
+
+ public static String decode(String e) {
+ try {
+ return decode(e,"utf-8");
+ } catch (Exception ex) {
+ return e;
+ }
+ }
+
+ public static class EncodingOutputStream
+ extends FilterOutputStream {
+
+ public EncodingOutputStream(OutputStream out) {
+ super(out);
+ }
+ @Override
+ public void write(byte[] b, int off, int len) throws IOException {
+ String enc = encode(b,off,len);
+ out.write(enc.getBytes(DEFAULT_ENCODING));
+ }
+ @Override
+ public void write(byte[] b) throws IOException {
+ String enc = encode(b);
+ out.write(enc.getBytes(DEFAULT_ENCODING));
+ }
+ @Override
+ public void write(int b) throws IOException {
+ String enc = encode((byte)b);
+ out.write(enc.getBytes(DEFAULT_ENCODING));
+ }
+ }
+
+ public static class EncodingWriter
+ extends FilterWriter {
+ private final Filter[] filters;
+ public EncodingWriter(OutputStream out) {
+ this(new OutputStreamWriter(out));
+ }
+ public EncodingWriter(OutputStream out,Filter Filter) {
+ this(new OutputStreamWriter(out),Filter);
+ }
+ public EncodingWriter(OutputStream out,Filter... filters) {
+ this(new OutputStreamWriter(out),filters);
+ }
+ public EncodingWriter(Writer out) {
+ this(out,new Filter[0]);
+ }
+ public EncodingWriter(Writer out, Filter Filter) {
+ this(out,new Filter[] {Filter});
+ }
+ public EncodingWriter(Writer out, Filter... filters) {
+ super(out);
+ this.filters = filters;
+ }
+ @Override
+ public void write(char[] b, int off, int len) throws IOException {
+ String enc = encode(b,off,len,filters);
+ out.write(enc.toCharArray());
+ }
+ @Override
+ public void write(char[] b) throws IOException {
+ String enc = encode(b,filters);
+ out.write(enc.toCharArray());
+ }
+ @Override
+ public void write(int b) throws IOException {
+ String enc = encode(new char[] {(char)b},filters);
+ out.write(enc.toCharArray());
+ }
+ @Override
+ public void write(
+ String str,
+ int off,
+ int len)
+ throws IOException {
+ String enc = encode(str,off,len,filters);
+ out.write(enc.toCharArray());
+ }
+ }
+
+ public static class DecodingInputStream
+ extends FilterInputStream {
+ public DecodingInputStream(InputStream in) {
+ super(in);
+ }
+ public DecodingInputStream(byte[] in) {
+ super(new ByteArrayInputStream(in));
+ }
+ public int read() throws IOException {
+ int c = super.read();
+ if (c == '%') {
+ int c1 = super.read();
+ int c2 = super.read();
+ return decode((char)c1,(char)c2);
+ } else {
+ return c;
+ }
+ }
+ @Override
+ public synchronized int read(byte[] b, int off, int len) throws IOException {
+ int n = off;
+ int i = -1;
+ while ((i = read()) != -1 && n < off+len) {
+ b[n++] = (byte)i;
+ }
+ return n - off;
+ }
+ @Override
+ public int read(byte[] b) throws IOException {
+ return read(b,0,b.length);
+ }
+ @Override
+ public long skip(long n) throws IOException {
+ long i = 0;
+ for (; i < n; i++) read();
+ return i;
+ }
+
+ }
+
+ public static class DecodingReader
+ extends FilterReader {
+ public DecodingReader(byte[] buf)
+ throws UnsupportedEncodingException {
+ this(new ByteArrayInputStream(buf));
+ }
+ public DecodingReader(
+ byte[] buf,String enc)
+ throws UnsupportedEncodingException {
+ this(new ByteArrayInputStream(buf),enc);
+ }
+ public DecodingReader(
+ InputStream in)
+ throws UnsupportedEncodingException {
+ this(in, DEFAULT_ENCODING);
+ }
+ public DecodingReader(
+ InputStream in,
+ String enc)
+ throws UnsupportedEncodingException {
+ this(new InputStreamReader(in,enc));
+ }
+ public DecodingReader(Reader in) {
+ super(in);
+ }
+ public int read() throws IOException {
+ int c = super.read();
+ if (c == '%') {
+ int c1 = super.read();
+ int c2 = super.read();
+ return decode((char)c1,(char)c2);
+ } else {
+ return c;
+ }
+ }
+ @Override
+ public synchronized int read(char[] b, int off, int len) throws IOException {
+ int n = off;
+ int i = -1;
+ while ((i = read()) != -1 && n < off+len) {
+ b[n++] = (char)i;
+ }
+ return n - off;
+ }
+ @Override
+ public int read(char[] b) throws IOException {
+ return read(b,0,b.length);
+ }
+ @Override
+ public long skip(long n) throws IOException {
+ long i = 0;
+ for (; i < n; i++) read();
+ return i;
+ }
+ }
+
+ private static byte decode(char c, int shift) {
+ return (byte)((((c >= '0' && c <= '9') ?
+ c - '0' :
+ (c >= 'A' && c <= 'F') ? c - 'A' + 10 :
+ (c >= 'a' && c<= 'f') ? c - 'a' + 10 :-1)
+ & 0xf) << shift);
+ }
+
+ private static byte decode(char c1, char c2) {
+ return (byte)(decode(c1,4) | decode(c2,0));
+ }
+
+}
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/CompositionExclusions.txt Mon Dec 31 20:59:44 2007
@@ -0,0 +1,197 @@
+# CompositionExclusions-5.0.0.txt
+# Date: 2006-05-23, 12:42:00 PST [KW]
+#
+# This file lists the characters for the Composition Exclusion Table
+# defined in UAX #15, Unicode Normalization Forms.
+#
+# This file is a normative contributory data file in the
+# Unicode Character Database.
+#
+# Copyright (c) 1991-2006 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+#
+# For more information, see
+# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
+#
+# For a full derivation of composition exclusions, see the derived property
+# Full_Composition_Exclusion in DerivedNormalizationProps.txt
+#
+
+# ================================================
+# (1) Script Specifics
+#
+# This list of characters cannot be derived from the UnicodeData.txt file.
+# ================================================
+
+0958 # DEVANAGARI LETTER QA
+0959 # DEVANAGARI LETTER KHHA
+095A # DEVANAGARI LETTER GHHA
+095B # DEVANAGARI LETTER ZA
+095C # DEVANAGARI LETTER DDDHA
+095D # DEVANAGARI LETTER RHA
+095E # DEVANAGARI LETTER FA
+095F # DEVANAGARI LETTER YYA
+09DC # BENGALI LETTER RRA
+09DD # BENGALI LETTER RHA
+09DF # BENGALI LETTER YYA
+0A33 # GURMUKHI LETTER LLA
+0A36 # GURMUKHI LETTER SHA
+0A59 # GURMUKHI LETTER KHHA
+0A5A # GURMUKHI LETTER GHHA
+0A5B # GURMUKHI LETTER ZA
+0A5E # GURMUKHI LETTER FA
+0B5C # ORIYA LETTER RRA
+0B5D # ORIYA LETTER RHA
+0F43 # TIBETAN LETTER GHA
+0F4D # TIBETAN LETTER DDHA
+0F52 # TIBETAN LETTER DHA
+0F57 # TIBETAN LETTER BHA
+0F5C # TIBETAN LETTER DZHA
+0F69 # TIBETAN LETTER KSSA
+0F76 # TIBETAN VOWEL SIGN VOCALIC R
+0F78 # TIBETAN VOWEL SIGN VOCALIC L
+0F93 # TIBETAN SUBJOINED LETTER GHA
+0F9D # TIBETAN SUBJOINED LETTER DDHA
+0FA2 # TIBETAN SUBJOINED LETTER DHA
+0FA7 # TIBETAN SUBJOINED LETTER BHA
+0FAC # TIBETAN SUBJOINED LETTER DZHA
+0FB9 # TIBETAN SUBJOINED LETTER KSSA
+FB1D # HEBREW LETTER YOD WITH HIRIQ
+FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
+FB2A # HEBREW LETTER SHIN WITH SHIN DOT
+FB2B # HEBREW LETTER SHIN WITH SIN DOT
+FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
+FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
+FB2E # HEBREW LETTER ALEF WITH PATAH
+FB2F # HEBREW LETTER ALEF WITH QAMATS
+FB30 # HEBREW LETTER ALEF WITH MAPIQ
+FB31 # HEBREW LETTER BET WITH DAGESH
+FB32 # HEBREW LETTER GIMEL WITH DAGESH
+FB33 # HEBREW LETTER DALET WITH DAGESH
+FB34 # HEBREW LETTER HE WITH MAPIQ
+FB35 # HEBREW LETTER VAV WITH DAGESH
+FB36 # HEBREW LETTER ZAYIN WITH DAGESH
+FB38 # HEBREW LETTER TET WITH DAGESH
+FB39 # HEBREW LETTER YOD WITH DAGESH
+FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
+FB3B # HEBREW LETTER KAF WITH DAGESH
+FB3C # HEBREW LETTER LAMED WITH DAGESH
+FB3E # HEBREW LETTER MEM WITH DAGESH
+FB40 # HEBREW LETTER NUN WITH DAGESH
+FB41 # HEBREW LETTER SAMEKH WITH DAGESH
+FB43 # HEBREW LETTER FINAL PE WITH DAGESH
+FB44 # HEBREW LETTER PE WITH DAGESH
+FB46 # HEBREW LETTER TSADI WITH DAGESH
+FB47 # HEBREW LETTER QOF WITH DAGESH
+FB48 # HEBREW LETTER RESH WITH DAGESH
+FB49 # HEBREW LETTER SHIN WITH DAGESH
+FB4A # HEBREW LETTER TAV WITH DAGESH
+FB4B # HEBREW LETTER VAV WITH HOLAM
+FB4C # HEBREW LETTER BET WITH RAFE
+FB4D # HEBREW LETTER KAF WITH RAFE
+FB4E # HEBREW LETTER PE WITH RAFE
+
+# Total code points: 67
+
+# ================================================
+# (2) Post Composition Version precomposed characters
+#
+# These characters cannot be derived solely from the UnicodeData.txt file
+# in this version of Unicode.
+#
+# Note that characters added to the standard after the
+# Composition Version and which have canonical decomposition mappings
+# are not automatically added to this list of Post Composition
+# Version precomposed characters.
+# ================================================
+
+2ADC # FORKING
+1D15E # MUSICAL SYMBOL HALF NOTE
+1D15F # MUSICAL SYMBOL QUARTER NOTE
+1D160 # MUSICAL SYMBOL EIGHTH NOTE
+1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
+1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
+1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
+1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
+1D1BB # MUSICAL SYMBOL MINIMA
+1D1BC # MUSICAL SYMBOL MINIMA BLACK
+1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
+1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
+1D1BF # MUSICAL SYMBOL FUSA WHITE
+1D1C0 # MUSICAL SYMBOL FUSA BLACK
+
+# Total code points: 14
+
+# ================================================
+# (3) Singleton Decompositions
+#
+# These characters can be derived from the UnicodeData.txt file
+# by including all characters whose canonical decomposition
+# consists of a single character.
+#
+# These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
+# ================================================
+
+# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
+# 0343 COMBINING GREEK KORONIS
+# 0374 GREEK NUMERAL SIGN
+# 037E GREEK QUESTION MARK
+# 0387 GREEK ANO TELEIA
+# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
+# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
+# 1F75 GREEK SMALL LETTER ETA WITH OXIA
+# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
+# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
+# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
+# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
+# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
+# 1FBE GREEK PROSGEGRAMMENI
+# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
+# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
+# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
+# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
+# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
+# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
+# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
+# 1FFD GREEK OXIA
+# 2000..2001 [2] EN QUAD..EM QUAD
+# 2126 OHM SIGN
+# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
+# 2329 LEFT-POINTING ANGLE BRACKET
+# 232A RIGHT-POINTING ANGLE BRACKET
+# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
+# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
+# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
+# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
+# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
+# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
+# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
+# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
+# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
+# FA70..FAD9 [106] CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
+# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
+
+# Total code points: 924
+
+# ================================================
+# (4) Non-Starter Decompositions
+#
+# These characters can be derived from the UnicodeData file
+# by including all characters whose canonical decomposition consists
+# of a sequence of characters, the first of which has a non-zero
+# combining class.
+#
+# These characters are simply quoted here for reference.
+# See also Full_Composition_Exclusion in DerivedNormalizationProps.txt
+# ================================================
+
+# 0344 COMBINING GREEK DIALYTIKA TONOS
+# 0F73 TIBETAN VOWEL SIGN II
+# 0F75 TIBETAN VOWEL SIGN UU
+# 0F81 TIBETAN VOWEL SIGN REVERSED II
+
+# Total code points: 4
+
Added: incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java
URL: http://svn.apache.org/viewvc/incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java?rev=607801&view=auto
==============================================================================
--- incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java (added)
+++ incubator/abdera/java/trunk/dependencies/i18n/src/main/java/org/apache/abdera/i18n/text/data/Generator.java Mon Dec 31 20:59:44 2007
@@ -0,0 +1,341 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements. The ASF licenses this file to You
+* under the Apache License, Version 2.0 (the "License"); you may not
+* use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License. For additional information regarding
+* copyright in this work, please see the NOTICE file in the top level
+* directory of this distribution.
+*/
+package org.apache.abdera.i18n.text.data;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintWriter;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Scanner;
+import java.util.regex.MatchResult;
+
+/**
+ * Tool for parsing the Unicode Character Database file format and generating
+ * the constants for the UnicodeCharacterDatabase file.
+ */
+public class Generator {
+
+ public static void main(String... args) {
+ PrintWriter pw = new PrintWriter(System.out);
+ BitSet exclusions = getExclusions(args[0]);
+ writeDecomposition(pw, args[1], exclusions);
+ }
+
+ private static void writeDecomposition(PrintWriter pw, String file, BitSet excluded) {
+ Scanner s = read(file);
+
+ BitSet compat = new BitSet();
+
+ List<Integer> cc_idx = new ArrayList<Integer>();
+ List<Integer> cc_data = new ArrayList<Integer>();
+
+ List<Integer> decomp_idx = new ArrayList<Integer>();
+ List<Integer[]> decomp_data = new ArrayList<Integer[]>();
+
+ List<Integer[]> comps = new ArrayList<Integer[]>();
+
+ List<Integer[]> hanguls = new ArrayList<Integer[]>();
+
+ while(s.hasNextLine() && s.hasNext()) {
+ if (s.findInLine("([^;\\s]*);[^;]*;[^;]*;([^;]*);[^;]*;([^;]*);.*") != null) {
+ MatchResult result = s.match();
+ int codepoint = Integer.parseInt(result.group(1),16);
+ int cc = Integer.parseInt(result.group(2));
+ if (cc != 0) {
+ cc_idx.add(codepoint);
+ cc_data.add(cc);
+ }
+ String dc = result.group(3).trim();
+ if (dc.length() > 0) {
+ if (dc.charAt(0) == '<') compat.set(codepoint);
+ dc = dc.substring(dc.indexOf('>') + 1).trim();
+ String[] points = dc.split("\\s");
+ List<Integer> list = new ArrayList<Integer>();
+ for (int n = 0; n < points.length; n++)
+ list.add(Integer.parseInt(points[n],16));
+ decomp_idx.add(codepoint);
+ decomp_data.add(list.toArray(new Integer[list.size()]));
+
+ if (!compat.get(codepoint) &&
+ !excluded.get(codepoint)) {
+ char f = (list.size() > 1) ?
+ (char)list.get(0).intValue() : '\u0000';
+ char l = (list.size() > 1) ?
+ (char)list.get(1).intValue() : (char)list.get(0).intValue();
+ comps.add(new Integer[] {(f << 16) | l,codepoint});
+ }
+
+
+ }
+ }
+ }
+
+ // Hanguls
+ for (int z = 0; z < 0x2BA4; ++z) {
+ int t = z % 0x001C;
+ char f = (t != 0) ?
+ (char)(0xAC00 + z - t) :
+ (char)(0x1100 + z / 0x024C);
+ char e = (t != 0) ?
+ (char)(0x11A7 + t) :
+ (char)(0x1161 + (z % 0x024C) / 0x001C);
+ int pair = (f << 16) | e;
+ int value = z + 0xAC00;
+ hanguls.add(new Integer[] {pair,value});
+ }
+
+ Comparator<Integer[]> comp = new Comparator<Integer[]>() {
+ public int compare(Integer[] o1, Integer[] o2) {
+ int i1 = o1[0];
+ int i2 = o2[0];
+ return i1 < i2 ? -1 :
+ i1 > i2 ? 1 : 0;
+ }
+ };
+ Collections.sort(comps,comp);
+ Collections.sort(hanguls, comp);
+
+ pw.print(" private static int[] getCompat() { return new int[] {");
+ int i = compat.nextSetBit(0), n = 0;
+ pw.print(i);
+ for (i = compat.nextSetBit(i); i>=0; i = compat.nextSetBit(i+1), n++) {
+ pw.print(',');
+ pw.print(i);
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+
+ pw.print(" private static int[] getCCIdx() { return new int[] {");
+ for (i = 0, n = 0; i < cc_idx.size(); i++, n++) {
+ pw.print(cc_idx.get(i));
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < cc_idx.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+
+ pw.print(" private static int[] getCCData() { return new int[] {");
+ for (i = 0, n = 0; i < cc_data.size(); i++, n++) {
+ pw.print(cc_data.get(i));
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < cc_data.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+
+ pw.print(" private static int[] getComposeIdx() { return new int[] {");
+ for (i = 0, n = 0; i < comps.size(); i++, n++) {
+ pw.print(comps.get(i)[0]);
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < comps.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+
+ pw.print(" private static int[] getComposeData() { return new int[] {");
+ for (i = 0, n = 0; i < comps.size(); i++, n++) {
+ pw.print(comps.get(i)[1]);
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < comps.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+
+ pw.print(" private static int[] getDecompIdx() { return new int[] {");
+ for (i = 0, n = 0; i < decomp_idx.size(); i++, n++) {
+ pw.print(decomp_idx.get(i));
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < decomp_idx.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+
+ int sets = 2;
+ int size = decomp_idx.size() / sets;
+ i = 0;
+ for (int a = 0; a < sets; a++) {
+ pw.print(" private static int[][] getDecompData" + (a+1) + "() { return new int[][] {");
+ for (i = a*i, n = 0; i < size * (a+1); i++, n++) {
+ Integer[] data = decomp_data.get(i);
+ pw.print('{');
+ for (int q = 0; q < data.length; q++) {
+ pw.print(data[q]);
+ if (q < data.length - 1) pw.print(',');
+ }
+ pw.print('}');
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < decomp_idx.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ }
+
+ pw.println(" private static int[][] getDecompData() {");
+ for (n = 0; n < sets; n++)
+ pw.println(" int[][] d" + (n+1) + " = getDecompData" + (n+1) + "();");
+
+ pw.print(" int[][] d = new int[");
+ for (n = 0; n < sets; n++) {
+ pw.print("d" + (n+1) + ".length");
+ if (n < sets - 1) pw.print('+');
+ }
+ pw.println("][];");
+
+ String len = "0";
+ for (n = 0; n < sets; n++) {
+ pw.println(" System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+ len = "d" + (n+1) + ".length";
+ }
+ pw.println(" return d;}");
+
+ pw.flush();
+
+ sets = 2;
+ i = 0;
+ int e = 0;
+ size = hanguls.size() / sets;
+ for (int a = 0; a < sets; a++) {
+ pw.print(" private static int[] getHangulPairs" + (a+1) + "() { return new int[] {");
+ for (i = a*i, n = 0; i < size * (a+1); i++, n++) {
+ pw.print(hanguls.get(i)[0]);
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (i < hanguls.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+ pw.print(" private static int[] getHangulCodepoints" + (a+1) + "() { return new int[] {");
+ for (e = a*e, n = 0; e < size * (a+1); e++, n++) {
+ pw.print(hanguls.get(e)[1]);
+ if (n % 20 == 0) {
+ pw.print("\n ");
+ n = 0;
+ }
+ if (e < hanguls.size() - 1) pw.print(',');
+ }
+ pw.print("};}\n\n");
+ pw.flush();
+
+ }
+
+ pw.println(" private static int[] getHangulPairs() {");
+ for (n = 0; n < sets; n++)
+ pw.println(" int[] d" + (n+1) + " = getHangulPairs" + (n+1) + "();");
+
+ pw.print(" int[] d = new int[");
+ for (n = 0; n < sets; n++) {
+ pw.print("d" + (n+1) + ".length");
+ if (n < sets - 1) pw.print('+');
+ }
+ pw.println("];");
+
+ len = "0";
+ for (n = 0; n < sets; n++) {
+ pw.println(" System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+ len = "d" + (n+1) + ".length";
+ }
+ pw.println(" return d;}");
+
+ pw.flush();
+
+
+ pw.println(" private static int[] getHangulCodepoints() {");
+ for (n = 0; n < sets; n++)
+ pw.println(" int[] d" + (n+1) + " = getHangulCodepoints" + (n+1) + "();");
+
+ pw.print(" int[] d = new int[");
+ for (n = 0; n < sets; n++) {
+ pw.print("d" + (n+1) + ".length");
+ if (n < sets - 1) pw.print('+');
+ }
+ pw.println("];");
+
+ len = "0";
+ for (n = 0; n < sets; n++) {
+ pw.println(" System.arraycopy(d" + (n+1) + ",0,d," + len + ",d" + (n+1) + ".length);");
+ len = "d" + (n+1) + ".length";
+ }
+ pw.println(" return d;}\n\n");
+
+ pw.flush();
+ }
+
+ private static BitSet getExclusions(String file) {
+ Scanner s = read(file).useDelimiter("\\s*#.*");
+ BitSet set = new BitSet();
+ while(s.hasNext()) {
+ String exc = s.next().trim();
+ if (exc.length() > 0) {
+ int i = Integer.parseInt(exc,16);
+ set.set(i);
+ }
+ }
+ return set;
+ }
+
+ private static Scanner read(String f) {
+ ClassLoader cl = Thread.currentThread().getContextClassLoader();
+ InputStream in = cl.getResourceAsStream(f);
+ if (in == null) {
+ try {
+ in = new FileInputStream(f);
+ } catch (Exception e) {}
+ }
+ if (in == null) {
+ try {
+ URL url = new URL(f);
+ in = url.openStream();
+ } catch (Exception e) {}
+ }
+ return in != null ? new Scanner(in) : null;
+ }
+
+}