You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2013/12/19 18:48:57 UTC
svn commit: r1552377 [5/15] - in /lucene/dev/branches/lucene5339: ./
dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/
dev-tools/idea/lucene/benchmark/src/ dev-tools/idea/lucene/demo/
dev-tools/idea/lucene/facet/ dev-tools/idea/solr/cont...
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Thu Dec 19 17:48:47 2013
@@ -35,11 +35,13 @@ import org.apache.lucene.analysis.tokena
* Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
* <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
* <li><HIRAGANA>: A single hiragana character</li>
+ * <li><KATAKANA>: A sequence of katakana characters</li>
+ * <li><HANGUL>: A sequence of Hangul characters</li>
* </ul>
*/
%%
-%unicode 6.1
+%unicode 6.3
%integer
%final
%public
@@ -50,33 +52,39 @@ import org.apache.lucene.analysis.tokena
%buffer 4096
%include SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format = ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend = ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
+ALetter = (\p{WB:ALetter} | {ALetterSupp})
+Format = (\p{WB:Format} | {FormatSupp})
+Numeric = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
+Extend = (\p{WB:Extend} | {ExtendSupp})
+Katakana = (\p{WB:Katakana} | {KatakanaSupp})
+MidLetter = (\p{WB:MidLetter} | {MidLetterSupp})
+MidNum = (\p{WB:MidNum} | {MidNumSupp})
+MidNumLet = (\p{WB:MidNumLet} | {MidNumLetSupp})
+ExtendNumLet = (\p{WB:ExtendNumLet} | {ExtendNumLetSupp})
+ComplexContext = (\p{LB:Complex_Context} | {ComplexContextSupp})
+Han = (\p{Script:Han} | {HanSupp})
+Hiragana = (\p{Script:Hiragana} | {HiraganaSupp})
+SingleQuote = (\p{WB:Single_Quote} | {SingleQuoteSupp})
+DoubleQuote = (\p{WB:Double_Quote} | {DoubleQuoteSupp})
+HebrewLetter = (\p{WB:Hebrew_Letter} | {HebrewLetterSupp})
+RegionalIndicator = (\p{WB:Regional_Indicator} | {RegionalIndicatorSupp})
+HebrewOrALetter = ({HebrewLetter} | {ALetter})
-// Script=Hangul & Aletter
-HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
// UAX#29 WB4. X (Extend | Format)* --> X
//
-ALetterEx = {ALetter} ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx = {Katakana} ({Format} | {Extend})*
-MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
-MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
-
-HanEx = {Han} ({Format} | {Extend})*
-HiraganaEx = {Hiragana} ({Format} | {Extend})*
+HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
+HebrewOrALetterEx = {HebrewOrALetter} ({Format} | {Extend})*
+NumericEx = {Numeric} ({Format} | {Extend})*
+KatakanaEx = {Katakana} ({Format} | {Extend})*
+MidLetterEx = ({MidLetter} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
+MidNumericEx = ({MidNum} | {MidNumLet} | {SingleQuote}) ({Format} | {Extend})*
+ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
+HanEx = {Han} ({Format} | {Extend})*
+HiraganaEx = {Hiragana} ({Format} | {Extend})*
+SingleQuoteEx = {SingleQuote} ({Format} | {Extend})*
+DoubleQuoteEx = {DoubleQuote} ({Format} | {Extend})*
+HebrewLetterEx = {HebrewLetter} ({Format} | {Extend})*
+RegionalIndicatorEx = {RegionalIndicator} ({Format} | {Extend})*
// URL and E-mail syntax specifications:
//
@@ -213,40 +221,47 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
{EMAIL} { return EMAIL_TYPE; }
// UAX#29 WB8. Numeric à Numeric
-// WB11. Numeric (MidNum | MidNumLet) Ã Numeric
-// WB12. Numeric à (MidNum | MidNumLet) Numeric
-// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) Ã ExtendNumLet
-// WB13b. ExtendNumLet à (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx}
- | {MidNumericEx} {NumericEx}
- | {NumericEx})*
-{ExtendNumLetEx}*
+// WB11. Numeric (MidNum | MidNumLet | Single_Quote) Ã Numeric
+// WB12. Numeric à (MidNum | MidNumLet | Single_Quote) Numeric
+// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) Ã ExtendNumLet
+// WB13b. ExtendNumLet à (ALetter | Hebrew_Letter | Numeric | Katakana)
+//
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
{ return NUMERIC_TYPE; }
// subset of the below for typing purposes only!
{HangulEx}+
{ return HANGUL_TYPE; }
-
+
{KatakanaEx}+
{ return KATAKANA_TYPE; }
-// UAX#29 WB5. ALetter à ALetter
-// WB6. ALetter à (MidLetter | MidNumLet) ALetter
-// WB7. ALetter (MidLetter | MidNumLet) Ã ALetter
-// WB9. ALetter à Numeric
-// WB10. Numeric à ALetter
+// UAX#29 WB5. (ALetter | Hebrew_Letter) Ã (ALetter | Hebrew_Letter)
+// WB6. (ALetter | Hebrew_Letter) Ã (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
+// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) Ã (ALetter | Hebrew_Letter)
+// WB7a. Hebrew_Letter à Single_Quote
+// WB7b. Hebrew_Letter à Double_Quote Hebrew_Letter
+// WB7c. Hebrew_Letter Double_Quote à Hebrew_Letter
+// WB9. (ALetter | Hebrew_Letter) Ã Numeric
+// WB10. Numeric à (ALetter | Hebrew_Letter)
// WB13. Katakana à Katakana
-// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) Ã ExtendNumLet
-// WB13b. ExtendNumLet à (ALetter | Numeric | Katakana)
+// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) Ã ExtendNumLet
+// WB13b. ExtendNumLet à (ALetter | Hebrew_Letter | Numeric | Katakana)
//
-{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
- | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
- | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ )
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
- | ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
- | {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*
+{ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
+ | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
+ | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
+ | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
+ )+
+ )
+({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
+ | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
+ | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx} )*
+ | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} )* {HebrewOrALetterEx} )*
+ )+
+ )
+)*
+{ExtendNumLetEx}*
{ return WORD_TYPE; }
@@ -258,7 +273,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
// annex. That means that satisfactory treatment of languages like Chinese
// or Thai requires special handling.
//
-// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
//
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -280,6 +295,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
// UAX#29 WB3. CR Ã LF
// WB3a. (Newline | CR | LF) ÷
// WB3b. ÷ (Newline | CR | LF)
+// WB13c. Regional_Indicator à Regional_Indicator
// WB14. Any ÷ Any
//
-[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
+ { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java Thu Dec 19 17:48:47 2013
@@ -133,8 +133,8 @@ public class SynonymFilterFactory extend
analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
- Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_50, reader) : factory.create(reader);
- TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_50, tokenizer) : tokenizer;
+ Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer(Version.LUCENE_CURRENT, reader) : factory.create(reader);
+ TokenStream stream = ignoreCase ? new LowerCaseFilter(Version.LUCENE_CURRENT, tokenizer) : tokenizer;
return new TokenStreamComponents(tokenizer, stream);
}
};
@@ -201,7 +201,7 @@ public class SynonymFilterFactory extend
private Analyzer loadAnalyzer(ResourceLoader loader, String cname) throws IOException {
Class<? extends Analyzer> clazz = loader.findClass(cname, Analyzer.class);
try {
- Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_50);
+ Analyzer analyzer = clazz.getConstructor(Version.class).newInstance(Version.LUCENE_CURRENT);
if (analyzer instanceof ResourceLoaderAware) {
((ResourceLoaderAware) analyzer).inform(loader);
}
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Thu Dec 19 17:48:47 2013
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex. */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT */
package org.apache.lucene.analysis.wikipedia;
@@ -84,21 +84,20 @@ class WikipediaTokenizerImpl {
private static final int [] ZZ_ACTION = zzUnpackAction();
private static final String ZZ_ACTION_PACKED_0 =
- "\12\0\4\1\4\2\1\3\1\1\1\4\1\1\2\5"+
- "\1\6\2\5\1\7\1\5\2\10\1\11\1\12\1\11"+
- "\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+
- "\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+
- "\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+
- "\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+
- "\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+
- "\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+
- "\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+
- "\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+
- "\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+
- "\1\56";
+ "\12\0\4\1\4\2\1\3\1\4\1\1\2\5\1\6"+
+ "\1\5\1\7\1\5\2\10\1\11\1\5\1\12\1\11"+
+ "\1\13\1\14\1\15\1\16\1\15\1\17\1\20\1\10"+
+ "\1\21\1\10\4\22\1\23\1\24\1\25\1\26\3\0"+
+ "\1\27\14\0\1\30\1\31\1\32\1\33\1\11\1\0"+
+ "\1\34\1\35\1\36\1\0\1\37\1\0\1\40\3\0"+
+ "\1\41\1\42\2\43\1\42\2\44\2\0\1\43\1\0"+
+ "\14\43\1\42\3\0\1\11\1\45\3\0\1\46\1\47"+
+ "\5\0\1\50\4\0\1\50\2\0\2\50\2\0\1\11"+
+ "\5\0\1\31\1\42\1\43\1\51\3\0\1\11\2\0"+
+ "\1\52\30\0\1\53\2\0\1\54\1\55\1\56";
private static int [] zzUnpackAction() {
- int [] result = new int[184];
+ int [] result = new int[181];
int offset = 0;
offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
return result;
@@ -125,30 +124,30 @@ class WikipediaTokenizerImpl {
private static final String ZZ_ROWMAP_PACKED_0 =
"\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+
"\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+
- "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u0370\0\u01b8\0\u039c"+
- "\0\u03c8\0\u03f4\0\u0420\0\u044c\0\u0478\0\u01b8\0\u039c\0\u04a4"+
- "\0\u01b8\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
- "\0\u0604\0\u0630\0\u065c\0\u0688\0\u06b4\0\u01b8\0\u06e0\0\u039c"+
- "\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+
- "\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
- "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+
- "\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+
+ "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u01b8\0\u0370\0\u039c"+
+ "\0\u03c8\0\u03f4\0\u0420\0\u01b8\0\u0370\0\u044c\0\u0478\0\u01b8"+
+ "\0\u04a4\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
+ "\0\u0604\0\u0630\0\u065c\0\u01b8\0\u0688\0\u0370\0\u06b4\0\u06e0"+
+ "\0\u070c\0\u01b8\0\u01b8\0\u0738\0\u0764\0\u0790\0\u01b8\0\u07bc"+
+ "\0\u07e8\0\u0814\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
+ "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u01b8\0\u01b8\0\u0a24"+
+ "\0\u0a50\0\u0a7c\0\u0a7c\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b2c"+
"\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+
- "\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
+ "\0\u0814\0\u0cb8\0\u0ce4\0\u0d10\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
"\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+
- "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+
- "\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+
+ "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u01b8"+
+ "\0\u1080\0\u10ac\0\u10d8\0\u1104\0\u01b8\0\u1130\0\u115c\0\u1188"+
"\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+
- "\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+
- "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+
- "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+
- "\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
+ "\0\u1314\0\u1340\0\u07e8\0\u0974\0\u136c\0\u1398\0\u13c4\0\u13f0"+
+ "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u01b8\0\u14cc\0\u14f8\0\u1524"+
+ "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u01b8\0\u1658"+
+ "\0\u1684\0\u16b0\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
"\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+
"\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+
- "\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8";
+ "\0\u1aa4\0\u1ad0\0\u01b8\0\u01b8\0\u01b8";
private static int [] zzUnpackRowMap() {
- int [] result = new int[184];
+ int [] result = new int[181];
int offset = 0;
offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
return result;
@@ -172,152 +171,149 @@ class WikipediaTokenizerImpl {
private static final String ZZ_TRANS_PACKED_0 =
"\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+
- "\1\20\1\21\1\22\1\23\1\24\2\13\1\25\2\13"+
- "\15\17\1\26\2\13\3\17\1\13\7\27\1\30\5\27"+
- "\4\31\1\27\1\32\3\27\1\33\1\27\15\31\3\27"+
- "\3\31\10\27\1\30\5\27\4\34\1\27\1\32\3\27"+
- "\1\35\1\27\15\34\3\27\3\34\1\27\7\36\1\37"+
- "\5\36\4\40\1\36\1\32\2\27\1\36\1\41\1\36"+
- "\15\40\3\36\1\42\2\40\2\36\1\43\5\36\1\37"+
- "\5\36\4\44\1\36\1\45\2\36\1\46\2\36\15\44"+
- "\3\36\3\44\10\36\1\37\5\36\4\47\1\36\1\45"+
- "\2\36\1\46\2\36\15\47\3\36\3\47\10\36\1\37"+
- "\5\36\4\47\1\36\1\45\2\36\1\50\2\36\15\47"+
- "\3\36\3\47\10\36\1\37\1\36\1\51\3\36\4\52"+
- "\1\36\1\45\5\36\15\52\3\36\3\52\10\36\1\53"+
- "\5\36\4\54\1\36\1\45\5\36\15\54\1\36\1\55"+
- "\1\36\3\54\1\36\1\56\1\57\5\56\1\60\1\56"+
- "\1\61\3\56\4\62\1\56\1\63\2\56\1\64\2\56"+
- "\15\62\2\56\1\65\3\62\1\56\55\0\1\66\62\0"+
- "\1\67\4\0\4\70\7\0\6\70\1\71\6\70\3\0"+
- "\3\70\12\0\1\72\43\0\1\73\1\74\1\75\1\76"+
- "\2\77\1\0\1\100\3\0\1\100\1\17\1\20\1\21"+
- "\1\22\7\0\15\17\3\0\3\17\3\0\1\101\1\0"+
- "\1\102\2\103\1\0\1\104\3\0\1\104\3\20\1\22"+
- "\7\0\15\20\3\0\3\20\2\0\1\73\1\105\1\75"+
- "\1\76\2\103\1\0\1\104\3\0\1\104\1\21\1\20"+
- "\1\21\1\22\7\0\15\21\3\0\3\21\3\0\1\106"+
- "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\4\22"+
- "\7\0\15\22\3\0\3\22\24\0\1\13\55\0\1\107"+
- "\73\0\1\110\16\0\1\67\4\0\4\70\7\0\15\70"+
- "\3\0\3\70\16\0\4\31\7\0\15\31\3\0\3\31"+
- "\24\0\1\27\56\0\1\111\42\0\4\34\7\0\15\34"+
- "\3\0\3\34\27\0\1\112\42\0\4\40\7\0\15\40"+
- "\3\0\3\40\16\0\4\40\7\0\2\40\1\113\12\40"+
- "\3\0\3\40\2\0\1\114\67\0\4\44\7\0\15\44"+
- "\3\0\3\44\24\0\1\36\55\0\1\115\43\0\4\47"+
- "\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\117"+
- "\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\120"+
- "\4\0\4\70\7\0\15\70\3\0\3\70\16\0\4\54"+
- "\7\0\15\54\3\0\3\54\47\0\1\117\6\0\1\121"+
- "\63\0\1\122\57\0\4\62\7\0\15\62\3\0\3\62"+
- "\24\0\1\56\55\0\1\123\43\0\4\70\7\0\15\70"+
- "\3\0\3\70\14\0\1\36\1\0\4\124\1\0\3\125"+
- "\3\0\15\124\3\0\3\124\14\0\1\36\1\0\4\124"+
- "\1\0\3\125\3\0\3\124\1\126\11\124\3\0\3\124"+
- "\16\0\1\127\1\0\1\127\10\0\15\127\3\0\3\127"+
- "\16\0\1\130\1\131\1\132\1\133\7\0\15\130\3\0"+
- "\3\130\16\0\1\134\1\0\1\134\10\0\15\134\3\0"+
- "\3\134\16\0\1\135\1\136\1\135\1\136\7\0\15\135"+
- "\3\0\3\135\16\0\1\137\2\140\1\141\7\0\15\137"+
- "\3\0\3\137\16\0\1\100\2\142\10\0\15\100\3\0"+
- "\3\100\16\0\1\143\2\144\1\145\7\0\15\143\3\0"+
- "\3\143\16\0\4\136\7\0\15\136\3\0\3\136\16\0"+
- "\1\146\2\147\1\150\7\0\15\146\3\0\3\146\16\0"+
- "\1\151\2\152\1\153\7\0\15\151\3\0\3\151\16\0"+
- "\1\154\1\144\1\155\1\145\7\0\15\154\3\0\3\154"+
- "\16\0\1\156\2\131\1\133\7\0\15\156\3\0\3\156"+
- "\30\0\1\157\1\160\64\0\1\161\27\0\4\40\7\0"+
- "\2\40\1\162\12\40\3\0\3\40\2\0\1\163\101\0"+
- "\1\164\1\165\40\0\4\70\7\0\6\70\1\166\6\70"+
- "\3\0\3\70\2\0\1\167\63\0\1\170\71\0\1\171"+
- "\1\172\34\0\1\173\1\0\1\36\1\0\4\124\1\0"+
- "\3\125\3\0\15\124\3\0\3\124\16\0\4\174\1\0"+
- "\3\125\3\0\15\174\3\0\3\174\12\0\1\173\1\0"+
- "\1\36\1\0\4\124\1\0\3\125\3\0\10\124\1\175"+
- "\4\124\3\0\3\124\2\0\1\73\13\0\1\127\1\0"+
- "\1\127\10\0\15\127\3\0\3\127\3\0\1\176\1\0"+
- "\1\102\2\177\6\0\1\130\1\131\1\132\1\133\7\0"+
- "\15\130\3\0\3\130\3\0\1\200\1\0\1\102\2\201"+
- "\1\0\1\202\3\0\1\202\3\131\1\133\7\0\15\131"+
- "\3\0\3\131\3\0\1\203\1\0\1\102\2\201\1\0"+
- "\1\202\3\0\1\202\1\132\1\131\1\132\1\133\7\0"+
- "\15\132\3\0\3\132\3\0\1\204\1\0\1\102\2\177"+
- "\6\0\4\133\7\0\15\133\3\0\3\133\3\0\1\205"+
- "\2\0\1\205\7\0\1\135\1\136\1\135\1\136\7\0"+
- "\15\135\3\0\3\135\3\0\1\205\2\0\1\205\7\0"+
- "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\1\0"+
- "\1\102\2\177\6\0\1\137\2\140\1\141\7\0\15\137"+
- "\3\0\3\137\3\0\1\201\1\0\1\102\2\201\1\0"+
- "\1\202\3\0\1\202\3\140\1\141\7\0\15\140\3\0"+
- "\3\140\3\0\1\177\1\0\1\102\2\177\6\0\4\141"+
- "\7\0\15\141\3\0\3\141\3\0\1\202\2\0\2\202"+
- "\1\0\1\202\3\0\1\202\3\142\10\0\15\142\3\0"+
- "\3\142\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+
- "\3\0\1\100\1\143\2\144\1\145\7\0\15\143\3\0"+
- "\3\143\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+
- "\3\0\1\104\3\144\1\145\7\0\15\144\3\0\3\144"+
- "\3\0\1\106\1\0\1\102\2\77\1\0\1\100\3\0"+
- "\1\100\4\145\7\0\15\145\3\0\3\145\3\0\1\77"+
- "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\146"+
- "\2\147\1\150\7\0\15\146\3\0\3\146\3\0\1\103"+
- "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\147"+
- "\1\150\7\0\15\147\3\0\3\147\3\0\1\77\1\0"+
- "\1\102\2\77\1\0\1\100\3\0\1\100\4\150\7\0"+
- "\15\150\3\0\3\150\3\0\1\100\2\0\2\100\1\0"+
- "\1\100\3\0\1\100\1\151\2\152\1\153\7\0\15\151"+
- "\3\0\3\151\3\0\1\104\2\0\2\104\1\0\1\104"+
- "\3\0\1\104\3\152\1\153\7\0\15\152\3\0\3\152"+
- "\3\0\1\100\2\0\2\100\1\0\1\100\3\0\1\100"+
- "\4\153\7\0\15\153\3\0\3\153\3\0\1\206\1\0"+
- "\1\102\2\77\1\0\1\100\3\0\1\100\1\154\1\144"+
- "\1\155\1\145\7\0\15\154\3\0\3\154\3\0\1\207"+
- "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\155"+
- "\1\144\1\155\1\145\7\0\15\155\3\0\3\155\3\0"+
- "\1\204\1\0\1\102\2\177\6\0\1\156\2\131\1\133"+
- "\7\0\15\156\3\0\3\156\31\0\1\160\54\0\1\210"+
- "\64\0\1\211\26\0\4\40\7\0\15\40\3\0\1\40"+
- "\1\212\1\40\31\0\1\165\54\0\1\213\35\0\1\36"+
- "\1\0\4\124\1\0\3\125\3\0\3\124\1\214\11\124"+
- "\3\0\3\124\2\0\1\215\102\0\1\172\54\0\1\216"+
- "\34\0\1\217\52\0\1\173\3\0\4\174\7\0\15\174"+
- "\3\0\3\174\12\0\1\173\1\0\1\220\1\0\4\124"+
- "\1\0\3\125\3\0\15\124\3\0\3\124\16\0\1\221"+
- "\1\133\1\221\1\133\7\0\15\221\3\0\3\221\16\0"+
- "\4\141\7\0\15\141\3\0\3\141\16\0\4\145\7\0"+
- "\15\145\3\0\3\145\16\0\4\150\7\0\15\150\3\0"+
- "\3\150\16\0\4\153\7\0\15\153\3\0\3\153\16\0"+
- "\1\222\1\145\1\222\1\145\7\0\15\222\3\0\3\222"+
- "\16\0\4\133\7\0\15\133\3\0\3\133\16\0\4\223"+
- "\7\0\15\223\3\0\3\223\33\0\1\224\61\0\1\225"+
- "\30\0\4\40\6\0\1\226\15\40\3\0\2\40\1\227"+
- "\33\0\1\230\32\0\1\173\1\0\1\36\1\0\4\124"+
- "\1\0\3\125\3\0\10\124\1\231\4\124\3\0\3\124"+
- "\2\0\1\232\104\0\1\233\36\0\4\234\7\0\15\234"+
- "\3\0\3\234\3\0\1\176\1\0\1\102\2\177\6\0"+
- "\1\221\1\133\1\221\1\133\7\0\15\221\3\0\3\221"+
- "\3\0\1\206\1\0\1\102\2\77\1\0\1\100\3\0"+
- "\1\100\1\222\1\145\1\222\1\145\7\0\15\222\3\0"+
- "\3\222\3\0\1\205\2\0\1\205\7\0\4\223\7\0"+
- "\15\223\3\0\3\223\34\0\1\235\55\0\1\236\26\0"+
- "\1\237\60\0\4\40\6\0\1\226\15\40\3\0\3\40"+
- "\34\0\1\240\31\0\1\173\1\0\1\117\1\0\4\124"+
- "\1\0\3\125\3\0\15\124\3\0\3\124\34\0\1\241"+
- "\32\0\1\242\2\0\4\234\7\0\15\234\3\0\3\234"+
- "\35\0\1\243\62\0\1\244\20\0\1\245\77\0\1\246"+
- "\53\0\1\247\32\0\1\36\1\0\4\174\1\0\3\125"+
- "\3\0\15\174\3\0\3\174\36\0\1\250\53\0\1\251"+
- "\33\0\4\252\7\0\15\252\3\0\3\252\36\0\1\253"+
- "\53\0\1\254\54\0\1\255\61\0\1\256\11\0\1\257"+
- "\12\0\4\252\7\0\15\252\3\0\3\252\37\0\1\260"+
- "\53\0\1\261\54\0\1\262\22\0\1\13\62\0\4\263"+
- "\7\0\15\263\3\0\3\263\40\0\1\264\53\0\1\265"+
- "\43\0\1\266\26\0\2\263\1\0\2\263\1\0\2\263"+
- "\2\0\5\263\7\0\15\263\3\0\4\263\27\0\1\267"+
- "\53\0\1\270\24\0";
+ "\1\20\1\21\1\22\1\23\3\13\1\24\2\13\15\17"+
+ "\1\25\2\13\3\17\1\13\7\26\1\27\5\26\4\30"+
+ "\5\26\1\31\1\26\15\30\3\26\3\30\10\26\1\27"+
+ "\5\26\4\32\5\26\1\33\1\26\15\32\3\26\3\32"+
+ "\1\26\7\34\1\35\5\34\4\36\1\34\1\37\2\26"+
+ "\1\34\1\40\1\34\15\36\3\34\1\41\2\36\2\34"+
+ "\1\42\5\34\1\35\5\34\4\43\4\34\1\44\2\34"+
+ "\15\43\3\34\3\43\10\34\1\35\5\34\4\45\4\34"+
+ "\1\44\2\34\15\45\3\34\3\45\10\34\1\35\5\34"+
+ "\4\45\4\34\1\46\2\34\15\45\3\34\3\45\10\34"+
+ "\1\35\1\34\1\47\3\34\4\50\7\34\15\50\3\34"+
+ "\3\50\10\34\1\51\5\34\4\52\7\34\15\52\1\34"+
+ "\1\53\1\34\3\52\1\34\1\54\1\55\5\54\1\56"+
+ "\1\54\1\57\3\54\4\60\4\54\1\61\2\54\15\60"+
+ "\2\54\1\62\3\60\1\54\55\0\1\63\62\0\1\64"+
+ "\4\0\4\65\7\0\6\65\1\66\6\65\3\0\3\65"+
+ "\12\0\1\67\43\0\1\70\1\71\1\72\1\73\2\74"+
+ "\1\0\1\75\3\0\1\75\1\17\1\20\1\21\1\22"+
+ "\7\0\15\17\3\0\3\17\3\0\1\76\1\0\1\77"+
+ "\2\100\1\0\1\101\3\0\1\101\3\20\1\22\7\0"+
+ "\15\20\3\0\3\20\2\0\1\70\1\102\1\72\1\73"+
+ "\2\100\1\0\1\101\3\0\1\101\1\21\1\20\1\21"+
+ "\1\22\7\0\15\21\3\0\3\21\3\0\1\103\1\0"+
+ "\1\77\2\74\1\0\1\75\3\0\1\75\4\22\7\0"+
+ "\15\22\3\0\3\22\26\0\1\104\73\0\1\105\16\0"+
+ "\1\64\4\0\4\65\7\0\15\65\3\0\3\65\16\0"+
+ "\4\30\7\0\15\30\3\0\3\30\27\0\1\106\42\0"+
+ "\4\32\7\0\15\32\3\0\3\32\27\0\1\107\42\0"+
+ "\4\36\7\0\15\36\3\0\3\36\24\0\1\26\45\0"+
+ "\4\36\7\0\2\36\1\110\12\36\3\0\3\36\2\0"+
+ "\1\111\67\0\4\43\7\0\15\43\3\0\3\43\26\0"+
+ "\1\112\43\0\4\45\7\0\15\45\3\0\3\45\26\0"+
+ "\1\113\37\0\1\114\57\0\4\50\7\0\15\50\3\0"+
+ "\3\50\11\0\1\115\4\0\4\65\7\0\15\65\3\0"+
+ "\3\65\16\0\4\52\7\0\15\52\3\0\3\52\47\0"+
+ "\1\114\6\0\1\116\63\0\1\117\57\0\4\60\7\0"+
+ "\15\60\3\0\3\60\26\0\1\120\43\0\4\65\7\0"+
+ "\15\65\3\0\3\65\14\0\1\34\1\0\4\121\1\0"+
+ "\3\122\3\0\15\121\3\0\3\121\14\0\1\34\1\0"+
+ "\4\121\1\0\3\122\3\0\3\121\1\123\11\121\3\0"+
+ "\3\121\16\0\1\124\1\0\1\124\10\0\15\124\3\0"+
+ "\3\124\16\0\1\125\1\126\1\127\1\130\7\0\15\125"+
+ "\3\0\3\125\16\0\1\131\1\0\1\131\10\0\15\131"+
+ "\3\0\3\131\16\0\1\132\1\133\1\132\1\133\7\0"+
+ "\15\132\3\0\3\132\16\0\1\134\2\135\1\136\7\0"+
+ "\15\134\3\0\3\134\16\0\1\75\2\137\10\0\15\75"+
+ "\3\0\3\75\16\0\1\140\2\141\1\142\7\0\15\140"+
+ "\3\0\3\140\16\0\4\133\7\0\15\133\3\0\3\133"+
+ "\16\0\1\143\2\144\1\145\7\0\15\143\3\0\3\143"+
+ "\16\0\1\146\2\147\1\150\7\0\15\146\3\0\3\146"+
+ "\16\0\1\151\1\141\1\152\1\142\7\0\15\151\3\0"+
+ "\3\151\16\0\1\153\2\126\1\130\7\0\15\153\3\0"+
+ "\3\153\30\0\1\154\1\155\64\0\1\156\27\0\4\36"+
+ "\7\0\2\36\1\157\12\36\3\0\3\36\2\0\1\160"+
+ "\101\0\1\161\1\162\40\0\4\65\7\0\6\65\1\163"+
+ "\6\65\3\0\3\65\2\0\1\164\63\0\1\165\71\0"+
+ "\1\166\1\167\34\0\1\170\1\0\1\34\1\0\4\121"+
+ "\1\0\3\122\3\0\15\121\3\0\3\121\16\0\4\171"+
+ "\1\0\3\122\3\0\15\171\3\0\3\171\12\0\1\170"+
+ "\1\0\1\34\1\0\4\121\1\0\3\122\3\0\10\121"+
+ "\1\172\4\121\3\0\3\121\2\0\1\70\13\0\1\124"+
+ "\1\0\1\124\10\0\15\124\3\0\3\124\3\0\1\173"+
+ "\1\0\1\77\2\174\6\0\1\125\1\126\1\127\1\130"+
+ "\7\0\15\125\3\0\3\125\3\0\1\175\1\0\1\77"+
+ "\2\176\1\0\1\177\3\0\1\177\3\126\1\130\7\0"+
+ "\15\126\3\0\3\126\3\0\1\200\1\0\1\77\2\176"+
+ "\1\0\1\177\3\0\1\177\1\127\1\126\1\127\1\130"+
+ "\7\0\15\127\3\0\3\127\3\0\1\201\1\0\1\77"+
+ "\2\174\6\0\4\130\7\0\15\130\3\0\3\130\3\0"+
+ "\1\202\2\0\1\202\7\0\1\132\1\133\1\132\1\133"+
+ "\7\0\15\132\3\0\3\132\3\0\1\202\2\0\1\202"+
+ "\7\0\4\133\7\0\15\133\3\0\3\133\3\0\1\174"+
+ "\1\0\1\77\2\174\6\0\1\134\2\135\1\136\7\0"+
+ "\15\134\3\0\3\134\3\0\1\176\1\0\1\77\2\176"+
+ "\1\0\1\177\3\0\1\177\3\135\1\136\7\0\15\135"+
+ "\3\0\3\135\3\0\1\174\1\0\1\77\2\174\6\0"+
+ "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\2\0"+
+ "\2\177\1\0\1\177\3\0\1\177\3\137\10\0\15\137"+
+ "\3\0\3\137\3\0\1\103\1\0\1\77\2\74\1\0"+
+ "\1\75\3\0\1\75\1\140\2\141\1\142\7\0\15\140"+
+ "\3\0\3\140\3\0\1\76\1\0\1\77\2\100\1\0"+
+ "\1\101\3\0\1\101\3\141\1\142\7\0\15\141\3\0"+
+ "\3\141\3\0\1\103\1\0\1\77\2\74\1\0\1\75"+
+ "\3\0\1\75\4\142\7\0\15\142\3\0\3\142\3\0"+
+ "\1\74\1\0\1\77\2\74\1\0\1\75\3\0\1\75"+
+ "\1\143\2\144\1\145\7\0\15\143\3\0\3\143\3\0"+
+ "\1\100\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
+ "\3\144\1\145\7\0\15\144\3\0\3\144\3\0\1\74"+
+ "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\4\145"+
+ "\7\0\15\145\3\0\3\145\3\0\1\75\2\0\2\75"+
+ "\1\0\1\75\3\0\1\75\1\146\2\147\1\150\7\0"+
+ "\15\146\3\0\3\146\3\0\1\101\2\0\2\101\1\0"+
+ "\1\101\3\0\1\101\3\147\1\150\7\0\15\147\3\0"+
+ "\3\147\3\0\1\75\2\0\2\75\1\0\1\75\3\0"+
+ "\1\75\4\150\7\0\15\150\3\0\3\150\3\0\1\203"+
+ "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\1\151"+
+ "\1\141\1\152\1\142\7\0\15\151\3\0\3\151\3\0"+
+ "\1\204\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
+ "\1\152\1\141\1\152\1\142\7\0\15\152\3\0\3\152"+
+ "\3\0\1\201\1\0\1\77\2\174\6\0\1\153\2\126"+
+ "\1\130\7\0\15\153\3\0\3\153\31\0\1\155\54\0"+
+ "\1\205\64\0\1\206\26\0\4\36\7\0\15\36\3\0"+
+ "\1\36\1\207\1\36\31\0\1\162\54\0\1\210\35\0"+
+ "\1\34\1\0\4\121\1\0\3\122\3\0\3\121\1\211"+
+ "\11\121\3\0\3\121\2\0\1\212\102\0\1\167\54\0"+
+ "\1\213\34\0\1\214\52\0\1\170\3\0\4\171\7\0"+
+ "\15\171\3\0\3\171\12\0\1\170\1\0\1\215\1\0"+
+ "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\16\0"+
+ "\1\216\1\130\1\216\1\130\7\0\15\216\3\0\3\216"+
+ "\16\0\4\136\7\0\15\136\3\0\3\136\16\0\4\142"+
+ "\7\0\15\142\3\0\3\142\16\0\4\145\7\0\15\145"+
+ "\3\0\3\145\16\0\4\150\7\0\15\150\3\0\3\150"+
+ "\16\0\1\217\1\142\1\217\1\142\7\0\15\217\3\0"+
+ "\3\217\16\0\4\130\7\0\15\130\3\0\3\130\16\0"+
+ "\4\220\7\0\15\220\3\0\3\220\33\0\1\221\61\0"+
+ "\1\222\30\0\4\36\6\0\1\223\15\36\3\0\2\36"+
+ "\1\224\33\0\1\225\32\0\1\170\1\0\1\34\1\0"+
+ "\4\121\1\0\3\122\3\0\10\121\1\226\4\121\3\0"+
+ "\3\121\2\0\1\227\104\0\1\230\36\0\4\231\7\0"+
+ "\15\231\3\0\3\231\3\0\1\173\1\0\1\77\2\174"+
+ "\6\0\1\216\1\130\1\216\1\130\7\0\15\216\3\0"+
+ "\3\216\3\0\1\203\1\0\1\77\2\74\1\0\1\75"+
+ "\3\0\1\75\1\217\1\142\1\217\1\142\7\0\15\217"+
+ "\3\0\3\217\3\0\1\202\2\0\1\202\7\0\4\220"+
+ "\7\0\15\220\3\0\3\220\34\0\1\232\55\0\1\233"+
+ "\26\0\1\234\60\0\4\36\6\0\1\223\15\36\3\0"+
+ "\3\36\34\0\1\235\31\0\1\170\1\0\1\114\1\0"+
+ "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\34\0"+
+ "\1\236\32\0\1\237\2\0\4\231\7\0\15\231\3\0"+
+ "\3\231\35\0\1\240\62\0\1\241\20\0\1\242\77\0"+
+ "\1\243\53\0\1\244\32\0\1\34\1\0\4\171\1\0"+
+ "\3\122\3\0\15\171\3\0\3\171\36\0\1\245\53\0"+
+ "\1\246\33\0\4\247\7\0\15\247\3\0\3\247\36\0"+
+ "\1\250\53\0\1\251\54\0\1\252\61\0\1\253\11\0"+
+ "\1\254\12\0\4\247\7\0\15\247\3\0\3\247\37\0"+
+ "\1\255\53\0\1\256\54\0\1\257\22\0\1\13\62\0"+
+ "\4\260\7\0\15\260\3\0\3\260\40\0\1\261\53\0"+
+ "\1\262\43\0\1\263\26\0\2\260\1\0\2\260\1\0"+
+ "\2\260\2\0\5\260\7\0\15\260\3\0\4\260\27\0"+
+ "\1\264\53\0\1\265\24\0";
private static int [] zzUnpackTrans() {
- int [] result = new int[7040];
+ int [] result = new int[6908];
int offset = 0;
offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
return result;
@@ -355,8 +351,8 @@ class WikipediaTokenizerImpl {
private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
private static final String ZZ_ATTRIBUTE_PACKED_0 =
- "\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+
- "\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+
+ "\12\0\1\11\7\1\1\11\2\1\1\11\5\1\1\11"+
+ "\3\1\1\11\13\1\1\11\5\1\2\11\3\0\1\11"+
"\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+
"\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+
"\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+
@@ -365,7 +361,7 @@ class WikipediaTokenizerImpl {
"\2\0\3\11";
private static int [] zzUnpackAttribute() {
- int [] result = new int[184];
+ int [] result = new int[181];
int offset = 0;
offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
return result;
@@ -508,7 +504,6 @@ final void reset() {
/**
* Creates a new scanner
- * There is also a java.io.InputStream version of this constructor.
*
* @param in the java.io.Reader to read input from.
*/
@@ -516,7 +511,6 @@ final void reset() {
this.zzReader = in;
}
-
/**
* Unpacks the compressed character translation table.
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Thu Dec 19 17:48:47 2013
@@ -212,7 +212,7 @@ DOUBLE_EQUALS = "="{2}
{DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
{CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+ [^] |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
}
<INTERNAL_LINK_STATE>{
@@ -221,7 +221,7 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+ [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
}
<EXTERNAL_LINK_STATE>{
@@ -236,7 +236,7 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+ [^] { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
}
//italics
<TWO_SINGLE_QUOTES_STATE>{
@@ -249,7 +249,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
}
//bold
<THREE_SINGLE_QUOTES_STATE>{
@@ -260,7 +260,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
}
//bold italics
@@ -272,7 +272,7 @@ DOUBLE_EQUALS = "="{2}
{EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
}
<DOUBLE_EQUALS_STATE>{
@@ -280,15 +280,15 @@ DOUBLE_EQUALS = "="{2}
{ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;}
{DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
//ignore
- . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
}
<DOUBLE_BRACE_STATE>{
{ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;}
{DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
{CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
- //ignore
- . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+ //ignore
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
}
<STRING> {
@@ -305,7 +305,7 @@ DOUBLE_EQUALS = "="{2}
{PIPE} {yybegin(STRING); return currentTokType;/*pipe*/}
- .|{WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
+ [^] { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
}
@@ -327,7 +327,7 @@ DOUBLE_EQUALS = "="{2}
//end wikipedia
/** Ignore the rest */
-. | {WHITESPACE}|{TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+[^] | {TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
//INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2}
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Thu Dec 19 17:48:47 2013
@@ -202,7 +202,7 @@ public class TestStandardAnalyzer extend
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
+ WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
wordBreakTest.test(a);
}
@@ -230,6 +230,8 @@ public class TestStandardAnalyzer extend
checkOneTerm(a, "壹ã", "壹ã"); // ideographic
checkOneTerm(a, "ìã", "ìã"); // hangul
}
+
+
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java Thu Dec 19 17:48:47 2013
@@ -60,7 +60,7 @@ public class TestStopAnalyzer extends Ba
public void testStopList() throws IOException {
CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
- StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
+ StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) {
assertNotNull(stream);
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java Thu Dec 19 17:48:47 2013
@@ -94,7 +94,7 @@ public class TestStopFilter extends Base
// LUCENE-3849: make sure after .end() we see the "ending" posInc
public void testEndStopword() throws Exception {
CharArraySet stopSet = StopFilter.makeStopSet(TEST_VERSION_CURRENT, "of");
- StopFilter stpf = new StopFilter(Version.LUCENE_40, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
+ StopFilter stpf = new StopFilter(TEST_VERSION_CURRENT, new MockTokenizer(new StringReader("test of"), MockTokenizer.WHITESPACE, false), stopSet);
assertTokenStreamContents(stpf, new String[] { "test" },
new int[] {0},
new int[] {4},
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Thu Dec 19 17:48:47 2013
@@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer
}
public void testUnicodeWordBreaks() throws Exception {
- WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
+ WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
wordBreakTest.test(a);
}
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/email.addresses.from.random.text.with.email.addresses.txt Thu Dec 19 17:48:47 2013
@@ -78,13 +78,13 @@ LTLNFsgB@[191.56.104.113]
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU
VGLn@z3E2.3an2.MM
TWmfsxn@[112.192.017.029]
-2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV
+2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D
CjaPC63@['\RDrwk]
Ayydpdoa@tdgypppmen.wf
"gfKP9"@jo3-r0.mz
-aTMgDW4@t5gax.XN--0ZWM56D
+aTMgDW4@t5gax.XN--3E0B707E
mcDrMO3FQ@nwc21.y5qd45lesryrp.IL
-NZqj@v50egeveepk.z290kk.Bc3.xn--jxalpdlp
+NZqj@v50egeveepk.z290kk.Bc3.xn--kprw13d
XtAhFnq@[218.214.251.103]
x0S8uos@[109.82.126.233]
ALB4KFavj16pODdd@i206d6s.MM
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl Thu Dec 19 17:48:47 2013
@@ -78,9 +78,10 @@ import org.junit.Ignore;
* \\p{Script = Hiragana}
* \\p{LineBreak = Complex_Context} (From $line_break_url)
* \\p{WordBreak = ALetter} (From $word_break_url)
+ * \\p{WordBreak = Hebrew_Letter}
* \\p{WordBreak = Katakana}
* \\p{WordBreak = Numeric} (Excludes full-width Arabic digits)
- * [\\uFF10-\\uFF19] (Full-width Arabic digits)
+ * [\\uFF10-\\uFF19] (Full-width Arabic digits)
*/
\@Ignore
public class ${class_name} extends BaseTokenStreamTestCase {
@@ -97,7 +98,7 @@ parse_Unicode_data_file($line_break_url,
parse_Unicode_data_file($scripts_url, $codepoints,
{'han' => 1, 'hiragana' => 1});
parse_Unicode_data_file($word_break_url, $codepoints,
- {'aletter' => 1, 'katakana' => 1, 'numeric' => 1});
+ {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
@@ -109,25 +110,33 @@ print STDERR "Writing '$output_path'..."
print OUT $header;
for my $line (@tests) {
- next if ($line =~ /^\s*\#/);
- # ÷ 0001 à 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
+ next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
+ # Example line: ÷ 0001 à 0300 ÷ # ÷ [0.2] <START OF HEADING> (Other) à [4.0] COMBINING GRAVE ACCENT (Extend_FE) ÷ [0.3]
my ($sequence) = $line =~ /^(.*?)\s*\#/;
+ $line =~ s/\t/ /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
print OUT " // $line\n";
$sequence =~ s/\s*÷\s*$//; # Trim trailing break character
my $test_string = $sequence;
$test_string =~ s/\s*÷\s*/\\u/g;
$test_string =~ s/\s*Ã\s*/\\u/g;
+ $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
$test_string =~ s/\\u000A/\\n/g;
$test_string =~ s/\\u000D/\\r/g;
+ $test_string =~ s/\\u0022/\\\"/g;
$sequence =~ s/^\s*÷\s*//; # Trim leading break character
my @tokens = ();
for my $candidate (split /\s*÷\s*/, $sequence) {
my @chars = ();
my $has_wanted_char = 0;
while ($candidate =~ /([0-9A-F]+)/gi) {
- push @chars, $1;
+ my $hexchar = $1;
+ if (4 == length($hexchar)) {
+ push @chars, $hexchar;
+ } else {
+ push @chars, above_BMP_char_to_surrogates($hexchar);
+ }
unless ($has_wanted_char) {
- $has_wanted_char = 1 if (defined($codepoints->[hex($1)]));
+ $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
}
}
if ($has_wanted_char) {
@@ -144,6 +153,21 @@ close OUT;
print STDERR "done.\n";
+# sub above_BMP_char_to_surrogates
+#
+# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
+# to the corresponding UTF-16 surrogate pair
+#
+# Assumption: input string is a sequence more than four hex digits
+#
+sub above_BMP_char_to_surrogates {
+ my $ch = hex(shift);
+ my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
+ my $low_surrogate = 0xDC00 + ($ch & 0x3FF);
+ return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
+}
+
+
# sub parse_Unicode_data_file
#
# Downloads and parses the specified Unicode data file, parses it, and
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.email.addresses.txt Thu Dec 19 17:48:47 2013
@@ -121,14 +121,14 @@ Bzzzzzzzz! Bzzzzzzzzzzzzzzz! Tell them "
of LTLNFsgB@[191.56.104.113] all, until it has read it is
iT0LOq.jtPW=G06~cETxl2ge@Ah0.4hn72v.tQ.LU there. <VG...@z3E2.3an2.MM> Once
TWmfsxn@[112.192.017.029] Spiros under the place
-2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KGBECHTV as were not a house of the
+2tP07A@2twe6u0d6uw6o.sed7n.109mx.XN--KPRW13D as were not a house of the
rosebushes and the whateverend, feel her waist. She changes everything. We had
decided to do you know CjaPC63@['\RDrwk] this, is what did leave, pray; let us
come to, <Ay...@tdgypppmen.wf> what history as died. Strange, Spiros with
delight: That night "gfKP9"@jo3-r0.mz and gold case
-<aT...@t5gax.XN--0ZWM56D> is spring: the aeon arising, wherein he returned,
+<aT...@t5gax.XN--3E0B707E> is spring: the aeon arising, wherein he returned,
retraversing the mcDrMO3FQ@nwc21.y5qd45lesryrp.IL gates, first
-<NZ...@v50egeveepk.z290kk.Bc3.xn--jxalpdlp> to reach session. Initiating first
+<NZ...@v50egeveepk.z290kk.Bc3.xn--kprw13d> to reach session. Initiating first
part of the main hall toward his own spurs. Hes an <XtAhFnq@[218.214.251.103]>
Irifix And older ones who wins? ADAM: x0S8uos@[109.82.126.233] The violin and
reality. The hidden set up to come. ROSE WAKINS: No answer. The
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/random.text.with.urls.txt Thu Dec 19 17:48:47 2013
@@ -24,7 +24,7 @@ and Joe recited this iron bars with thei
almost drove me towards evening. At
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH the
sergeant and then on the raw
-<Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m> afternoon towards
+<Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m> afternoon towards
the terror, merely wished him as biled
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb -- a conciliatory air on in
<ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J>
@@ -47,7 +47,7 @@ to live. You didn't know nothing could a
behind the answer those aids, I saw him in the same appearance of the convict's
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
confession, and bring you see? '
-HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an
+HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND said my limbs. Joe in an
accusatory manner as well known that Joe Gargery marry her cup. `I wonder and
there was publicly made it was,
<file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#> as lookers on; me, I
@@ -63,7 +63,7 @@ again
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
towards evening. At last, and kneaded, and a dead man taking any. There was
publicly made out there?' said I,
-ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
+ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
glancing http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY at the
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/ river wound, twenty miles of the
number called, hears the awful it lights; here and trimmings of Caesar. This
@@ -155,7 +155,7 @@ ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qM
at me, and that her walking z3ymb.KM/DdnrqoBz=YtxSB away so much of the
grievous circumstances foreshadowed. After receiving the way, that I thought,
if she should go to?' `Good again!' cried the
-FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0 society of a savoury pork pie,
+FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0 society of a savoury pork pie,
and nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc they challenged, hears nothin' all my
hands in herself, and bring him by hand. `This,' ftp://085.062.055.011/bopfVV/
said he wore ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs a dog of
@@ -191,7 +191,7 @@ and tingling, and that I had won of the
from Richard the furthest end of
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w both imp and stung by the
bright fire, another look
-zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her
+zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1 over her
best use asking questions, and feet,
<ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ> hanging to try
back was the poker. `It was not warmly. `Seems
@@ -204,7 +204,7 @@ kitchen wall,
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1 he ate the
house, end with the Ghost in order): Forty-three pence?' To five hundred
Gargerys.' `I say, Pip; stay
-7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with
+7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb out with
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
his shot, and reposing no help to my seat. It was in the kitchen wall, because
I calculated the sounds by giving me by the name for a rush of Joe's forge
@@ -299,7 +299,7 @@ She drew the kitchen, carrying file:///Y
wooden hut
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
where it seemed to give Pirrip as
-<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO>
+<79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO>
to say, on the guiltily coarse his head, he tried to the
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
remark. `There's one sprinkled all I was possible she beggared me. All these
@@ -311,7 +311,7 @@ Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA
he shook her veil so thick nor my milk and would impart all had returned, with
soap-suds, I had FILE:///#F9Bgl just like thin snow. `Enough of his right side
of thenceforth sitting
-jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
+jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
in File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg my soul. I sat down on it, I have
been a spoon that the pie, blacksmith?' asked Estella of it made a mouth wide
open, and so
@@ -324,7 +324,7 @@ FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
pointed to Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz himself. No glimpse of
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg herself, I saw that he would have
been there, I was too far and uncomfortable by it.
-http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
+http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
Under the Above,' I rather to become transfixed -- he gave me out of the
kitchen empty-handed, to keep him, I had made a
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG subject, if he had
@@ -468,7 +468,7 @@ hard twist upon his -- `Well, boy,' Uncl
had heard it had hesitated as little window, violently plunging and she had
committed, and had all about the present calling, which the fingers of tea on
Saturdays than this country, gentlemen, but I could see those,
-https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
+https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
too, if you remember what stock she told me again. `But I know what
file:///enqvF%EFLOBsZhl8h2z wittles is?' `Yes, ma'am.' `Estella, take me again
and ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A refractory
@@ -493,7 +493,7 @@ right-side
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
flaxen curls and tables, and a foot of the blacksmith's.' `Halloa!' said Joe,
staring at that it had withered like a infunt, and took another look about the
-rum <6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once.
+rum <6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/> out at once.
Three Jolly Bargemen to think she seemed to tell you were. When we saw the file
coming at my slice. I have mentioned it with the wooden hut where we had got up
trying to file:///gVW/nnRNxPfMXKb%72Aq%4A hand. If ever grateful for. If a
@@ -662,7 +662,7 @@ open,' he
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
wiped the liquor. He was the bad; and some one
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE another
-Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws
+Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9 turned to put straws
down by a most powerfully down
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x to me, and all that
know the window,
@@ -993,7 +993,7 @@ upon a door, which was gobbling mincemea
that Joe's blue file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/ eyes, had an
hour longer than at me, and dismal, and gloves, and that's further than I
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs looked on. `Now, boy!
-g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
+g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
Why, here's a ridiculous old chap. And looked up by hand. `Why don't like
`sulks.' Therefore, I was in such game?' Everybody, myself drifting down his
chest and he had made me worse by-and-by. I was a
@@ -1035,7 +1035,7 @@ in every word out again. `You are prison
<HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt>
for us heavy. `I Bolted, myself, 5.Piba4ac.JE/55M1H/AZXdj and thread, and we
after him, or to inspire confidence. This was brought you spoke all the act, he
-couldn't m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire
+couldn't m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/ keep the fire
between the forge was <ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/>
busy in it. Until
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/ she jammed
@@ -1329,7 +1329,7 @@ sort Http://w9ys35.wb55p6l.hxl.rs/Y97%58
FILE://155.24.106.255/3VEZIT7 if it was to him, I might not do not afraid of
report, and looking rather to make nothing of a confidential voice,
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
-as lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be
+as lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET if he would be
supposed,' said the wind and so we were read the conversation consisted of it
had so that we saw some bread, some
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C brandy out: no black velvet
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/urls.from.random.text.with.urls.txt Thu Dec 19 17:48:47 2013
@@ -10,7 +10,7 @@ http://Rcbu6/Oxc%C0IkGSZ8rO9IUpd/BEvkvw3
file:///2CdsP/U2GCLT
Http://Pzw978uzb.ai/yB;mt/o8hVKG/%231Y/Xb1%bb6v1fhjfdkfkBvxed?8mq~=OvF&STpJJk=ws0ZO&0DRA=
HTTP://173.202.175.16/Md7tF6lj7r/oioJ9TpL8/x%03PjXgMMBC7C3%BDWzoVMzH
-Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m
+Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m
M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb
ftp://evzed8zvv.l2xkky.Dq85qcl1.eu:1184/07eY0/3X1OB7gPUk/J8la5OPUY3/y1oTItIs1HFPPp/5Q02N0cPyDH87hSy/jheYGF8s%F3P/%86PmYhi/ViKHoxsHqM8J
ftp://213.7.210.47/%e5pFkj6e6Jczc/ypJGG/z%663jYR/37IxLQBPr/Ciq50EUIdueyj
@@ -23,13 +23,13 @@ Ftp://Xmswrxn8d-1s.pe.gm/dB6C3xTk%D3x/EK
FILE:///rKnQkS0MAF#tM%53_2%03%d6ZICH
ftp://R5ecjkf1yx4wpskfh.tv0y3m90ak.0R605.se:51297/zpWcRRcG/1woSqw7ZUko/
file:///%C5=.%8by/uuFXEaW8.%7E4/DRM%33Kh2xb8u%7FHizfLn/aoF06#7srWW%2EKoFf
-HTTP://yA2O3F.XN--0ZWM56D/qPDTt/MwMXGQq2S7JT/TJ2iCND
+HTTP://yA2O3F.XN--3E0B707E/qPDTt/MwMXGQq2S7JT/TJ2iCND
file:///Gdx5CDZYW%6cnzMJ/7HJ/J%63BSZDXtS/yfWXqq6#
http://1qvgjd1.TP/7oq5gWW/Gwqf8fxBXR4/?Br,q=ayMz0&1IO%370N7=;Sl1czc2L+5bRISfD+w&ygP3FhV%E1w36=2Rx
ftp://5SCC6BUYP.Knf1cvlc22z9.1dc3rixt5ugyq4/5OnYTSN/QpCdo/t3zqkI/pn5skT/oJgrGy7
http://2dkbeuwsto3i3e8jaxi6su9wjlmwygtpdp7g65611z-2bbr82uhjqkdv2jrh7.KZ/FiSvI/aaB&dPQ%42kLdM
FTP://Hi144dz6hctql2n3uom.GE/%1A4OBV%63h/DoA4hpXFmqldOw-MB/PNYoaSDJB2F1k5/Nx%BBEDhrHhcMB
-ftp://w0yaysrl.XN--9T4B11YI5A/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
+ftp://w0yaysrl.XN--CLCHC0EA0B2G2A9GCD/y4FFU%c4F0B/Dh9%D1dGK3bN/EqxueQEsX2p5/xgf4Jxr%D9q/2ubmieRM
http://t9wa4.rjcahbc06qmyk9jkhu3f.ZA/vIwW3sc3Pg/Bwmeo6KAjkRY
N54l6e.vu/1m2%8bMFjv/oBdy%36.eL;33/N%d21Qvm/
http://ah-2d4.ASIA/qmp
@@ -75,7 +75,7 @@ http://4u3o/BKdhwRyzG
file:///LdsHfPABFz1vRD1OB6Yl/RS6&1Gmz/mfYul/
ftp://E1cdf-p.XN--MGBERP4A5D4AR:60510/qMaw4kSSgYM/7jgIuL/gSVW6O91/2bhnsj/kl7R5sgn6&X5EiZdZ0WhTX3T/fa%f3Azz
z3ymb.KM/DdnrqoBz=YtxSB
-FTP://7kgip3z.XN--HGBK6AJ7F53BBA:15983/OYEQzIA0
+FTP://7kgip3z.XN--KPRY57D:15983/OYEQzIA0
nezt6awdc.lSZDSU14B1OH.4n6nkmjyyj.cc
ftp://085.062.055.011/bopfVV/
ftp://Mbbn8n.6ge03fiivyc7of.PS/mvb/X8VNt/5WrMZpw/flC6Rs
@@ -93,12 +93,12 @@ https://[3790:ad57:0B63::e5f7:f6ac:164C]
bl60k0jqkc9.oow84o1.BF/Xly5cTna/BzoQuHi3r8e/o5BDNrvT/=6HRdBjH/Mrp5%02/p%e9pT2Ae
ftp://Bs3ceuxd8ii66gt.X8wwdpt.BB:27095/3BfkvfzcmTS/FTffh&S/gIWvJ5Kd/AlOQ%3EnO
http://ch43n.51rkj.rze.mq/pJjrSAiuSv/3x/EK%59ReZM9w
-zQFC1SPO96J.Jy20d8.xn--0zwm56d:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
+zQFC1SPO96J.Jy20d8.xn--3e0b707e:863/0OWpT4dpkMURAGe/nFg/LQBUr%3E/af7dO1
ftp://Xctk9iigg.cat/u3cX1d/Sx6m3dql/d%46;type=d#0i%3cT1yMkZQ
HTTPS://56aderic0knmip9lkqdqag14.uk:45885/lELiK:/vF%4C5Enwqy/P5NGJ2b/dD6sg1yMV
ftp://vlt.3g45k63viz2.tcnm3.UA:60664/AJ9iqYk%c1/uKbohn2/K%D1kequ4z8rxFpJ
Ftp://2gifamku.jqv10es.MX/yJ0rhtMYX/Y1Wq%F90RYO1F/NT0%aeAG3/r3Act1
-7WO6F.XN--11B5BS3A9AJ6G/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
+7WO6F.XN--45BRJ9C/1L%f9G0NEu/L2lD/mQGNS9UhgCEb
ftp://mIMU.t4d24n4lyx39.zURN708MCNGK-TJ42GLLBQRJHVENGPO.bw:59930/KmBYQKHfcjNRe/rK3fUjg%0Ad/.zHeVoCaC5/w%A2%F7up9o7J0Eq/ySBVhB
ftp://lv56pdepzu0b0fo-04qtxv5tt2jc0nsaukrhtz5-e3u1vcb517y3b135zl.e0r1hson.dk/3TVoqjp6%1FCFSkt/006VZfho/gxrWxgDawM3Uk
Ftp://7n977.Niyt.2fgkzfhj.q7-DJ.Ow7a.it/5zfRi3PO8/1zfKT9%421tP/?SazEijJq%710COQKWeLE/TdUc%b2u/2AxBw9%4BUN6Zp4Z/KfUZd1MTdPv/L4m1tI3/WJvcK1
@@ -147,20 +147,20 @@ ftp://Lq.es/%B1ZPdTZgB2mNFW/qre92rM
file:///IZ47ESCtX%aatQab1/V553gjR?Me/#9%68qPw
file:///Y?GG/BBqMPBJ/nsxX3qP/8P24WdqBxH
ftp://7vl2w.jp/b%a5fBYyDR/ZN%62LG9aYpjSwn0yWg/nG97gndK%69XZ#fet%55XXZhslTNrq5T
-79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--DEBA0AD/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
+79wvzk3.24dyfkxg0f4z-hsqgqqzj2p9n59el0a.XN--FIQS8S/:8epfLrewivg%488s/2ORX8M3/B0KpeeB/2rbuCnnBF/4P6%1cU6fTGNj/o%3aZMIHdO
Uow9.sF.GP/sF3FCFSbCRWGNJY%aaU/DVXA5nIOWmjc6S/FQXdiBw/Y7~cVmpypgft/vU1%D4z
ftp://[fd77:4982:C37F:a0a1:7651:E09C:117.093.145.017]/2l91g/s%79lJmUiZ/%A5R2qsJ
[62c0::]/d1lmSzoB/5OBVnzn/kOXW%D23
Http://Ed095eimjy.rlb5698d.kp/_l5uoOO/aA494s?3nSxdIpE=y%79qu+2un1hGR&J%76=8&L%bed=uY5hO+s+IKk1S&Q=HHXEC+Gof86QIRHy&35QY5=
FILE:///#F9Bgl
-jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--0ZWM56D/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
+jyia054.l814D9SNHRRA5RJCCW.kvxga.XN--3E0B707E/sBbx24%f2Tw2/Sd0Lul0Vg1bbIqW~/lveEw
File:///KKfIe63z/BETB.T%C6sG/RcYgnOycg
ftp://892f7.oel50j.32.9qj1p-g7lgw.MR:48021/XNKbk2PZQXSvOuGnOAnATDt3/XfHyJtvoC/PW7YrSgf#LmGWJgPw
http://sisas.ua/4CU60ZLK4VgY8AR89
FTP://7qf.hlj.TN/IXOeaf/t%c52Jxwy#YkcAy2
Ftp://Gbu5t.HT/xad4fgjaN#GLpU3XQd6%7F(cHIz
file:///A1omJiPzafgAm/addqzG%dc%62/Lw1mamTg
-http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--9T4B11YI5A/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
+http://89qw34ksf0qf6iq264of-1nya4ds7qvpixw8c951aw8wcm3.qxk7usa.N8j1frzfgnkbi9y2.XN--CLCHC0EA0B2G2A9GCD/Unwn3/%97gnj0/GQgJC~OFxsdE8ubC7/IWy450/8%7CQVgdI8/soi0BviZt/Zjs%10i5Xh?qi8t9=rBbPok,Si&*Xl=Q+fT&Hx4%D70=84+8W%18+sV2BU6xCDP%47M&Usbms=
Z7tid0uh.eZMOI-M1.umlsyksuzovqdw6wozbd.BW/m%e684OhC/ErAhpGiG
ftp://tw7d-6yu.im:2055/%66qbqzss/OmPGW;type=d
FTP://zst.tn/QcUpaA/VKvJ2/JN6AKew/iXYIiHm7mfPFmD%21E5/yTQpoiqdbaaS1/LnzOX#VqsobH
@@ -228,7 +228,7 @@ file:///UIIGOxv6jvF2%c0/%A8J3%677Gmq8im1
http://Qhk9z.zm/cOGBen/mBsDycEI5V7L1s%84WUj7863/p%5f~okuRD51b0M?b%F2d%67ujGr=oh8PWUtK&j6uX7baX=&sg3RUocA9W=m5IaF&JWH9G=fyiOtnC3+7RJA+ippw96rvu+BxtGg&F6f1=jmPS&3PE0xX5=TGV%5c5J&%fc@NSEynhuvb=&MkRIt33=
Http://[98cc:433d:2C25:62dd:54ba:d10b:63d3:4C40]/YlbNrJod/fdjuN/qYqSdqr5/KAbXYHO%F0m7Ws9
file:///ywFY5HK/XAv@v%66o/M2O4Wlny50hypf5%02A8
-https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--0ZWM56D/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
+https://nWC9-RIA00RPVL4SSWRICWWX3NH5SMQIA7IPMCK174T30VQBL-M6.XN--3E0B707E/CwE%e2rWaYZmE?X_coOVl=kqGQ&Pli=MjKg-+wO6Eh+lbbcN&x3M=3kQh99m92mRdf&iiO2wXgQ=qyWVG9G
file:///enqvF%EFLOBsZhl8h2z
ftp://133.4.130.192/p%b1LgcONfo%bc&kmH/Ibh6Lq%DCJhnswT%1A
ftp://1xf.ipl4f0y6c4.VA/LHuq~/p2nPbE/0YGGNJB%DEje2psef_B/aKOuMl1Q9
@@ -240,7 +240,7 @@ http://nEN5ZN.EG/%0efsf4v30L
file:///19%9947/ksd3Sq7W78%27/2K_Ylzcu2q
r8sht9qzsc1e2wp.ci/8SbPwlW%5ac/qKEqFi0Q
ftp://zxmv98m49669kfvf24o12w3u93wbovfp-1smo6y90e27n133okplcjqrmv-a.CD/JM5RAAY/sJdBntYWuEY4uB7hz/ozRSmFJD/#Xv22:Xvg
-6S8.Crwllo5e3.jmtz.XN--G6W251D/6InlQn/hnhu2f%ac8tX/apq%0D6o/
+6S8.Crwllo5e3.jmtz.XN--GECRJ9C/6InlQn/hnhu2f%ac8tX/apq%0D6o/
file:///gVW/nnRNxPfMXKb%72Aq%4A
file:///Fzza388TQ
file:///
@@ -314,7 +314,7 @@ file:///3%aexrb7UdZ5GpR4ZIfoxwL/vQV%4a2z
f5ms.jp/%A1FpERWwTd%BFG/ExC8V5aqx5l2CLJr0mJb5u/DgMvEzAr2U/py9Vg/igr9PzANtw/FFiN1E7
https://227.086.128.010:64985/MDKuFInA86qto5/_cK=4S%49Ic/SPp76/TlV%0Arlwfx/
Ftp://171.160.94.43/ALTgS46I4VM/55PbbK/5N%faTSE
-Ftp://3zd7z.etw.XN--JXALPDLP/4UztCuTbW2z/LL%2cDI/dTYSi9
+Ftp://3zd7z.etw.XN--KPRW13D/4UztCuTbW2z/LL%2cDI/dTYSi9
t6xfr.wxjz5p2t5.zl8m4.MN/2cbpjk/gsdm/5Mvc-j3rc/16Wb65&c7x
ftp://D02-auxxaeqnv9ve-jlmo3.l10vqu.12jl.2mvjwrsqm.BA/r71QLLNu6oGJjG/HbxrX1Grq8/QR%2agZv4hR
file:///XoCg%EDVf/A3ibJYjU
@@ -476,7 +476,7 @@ ftp://53.151.134.240/uZqGXLUIu-J/=%0C2pO
FILE:///Kywof5D5q/0TRS/zayrkrnENB
file:///EYS2nDf%9671qsm34OZeB%e5lUA/rYBDn0DKs0/
mpuwl0.BA/MkvAvc?j%11K4=9gE%613&qOOEP0t=g7EXs
-g6tylc0.daeczh.4q.XN--9T4B11YI5A/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
+g6tylc0.daeczh.4q.XN--CLCHC0EA0B2G2A9GCD/1SbCR9cX1%3D/YfP8CpLKn5KzTL8/Kj11z%B7OuqJU;qM4P
file:///TJa%86AczeCmM5QMhi/Wox~Ajl/WxUF%5eSA:y%0fD%E21/x%cca%d3Qgx/8iWJ5-h%26/fCK%01nQNrK8#ygTTB
file:///~%303cUUVYTEaQU5%5DXbogiPKb/favR2rETEh/9TXM%15u/nYCOZpZgL
file:///mJM%a1/jv5%53QDqE/bFMu0CBp
@@ -496,7 +496,7 @@ http://gpu16lz.LS/9e%daJrwQfHEpFvsZ3jx/c
file://ij9anjtok86ro.uN-BGDQ855IB.sDXAQR.5kr8kz.3J3M8XRM.18r3s0g-6.4rjsmwue0lwao0og17d-5-1.F1h3qgkul29yw2t4p4se5clomncxhmoy.g6c9tbz7.pa/5LMtmbl/1tfIF/pBOV7Hc
HTTPS://bF2RA.kw/1TA9pTTBg/nM/VSRo%85Kt?%62mxNfo=HDowgwkM3&9oPOLH2=yKOxIe+YNtt
5.Piba4ac.JE/55M1H/AZXdj
-m-k6-ej7x.XN--HLCJ6AYA9ESC7A/suVrNQSIj9/TmRhHbe/o&0dbqR/
+m-k6-ej7x.XN--J6W193G/suVrNQSIj9/TmRhHbe/o&0dbqR/
ftp://242.228.138.8/o%CC_QjILS%17aYH/%caw8CcVZyPRZ/
hGE9YH3D6.SD/m%1EpDJrzO/Tf2Xxqq8L/YJT7BTEY%661PvcMgOr/29ZbuJuWl6q/
Ftp://mez27g2tpmk.MC/%B8AHk%95etDns%46/gXbsCn%6C-/s8_Jmy/DhmfT~Di6KD
@@ -633,7 +633,7 @@ http://047.014.184.200/Z_QdOwjzfBue4Nt/a
Http://w9ys35.wb55p6l.hxl.rs/Y97%58Lp8JjLZw/5L
FILE://155.24.106.255/3VEZIT7
d1y8zvhwq40bi3tom.hPCZ.gJ-286X.TG/ayWKrgAvF6tn/L4SgquZT6C/1DmNe/CI69rJ/%f6QrzZGkSQ
-lda5l5wc.XN--HGBK6AJ7F53BBA/pr80SSZ/eNM1%D50lp/Rc%8EimOET
+lda5l5wc.XN--KPRY57D/pr80SSZ/eNM1%D50lp/Rc%8EimOET
l13t2t.sk/O%2BmRkw/@0AgGL@NX/wgt&aggDcp#0IYe'C
FILE://a6ys9a4.xj.BY/%99BGXp/F=yJtxc71/gvXuHuB9k
212.072.006.032/6kV8ce%2e/%e7lzm-HB%4artP/zg6tWMW7RIG?U7=HAXw$D3sM%7DyDJ&Gt=
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestUAX29URLEmailTokenizerFactory.java Thu Dec 19 17:48:47 2013
@@ -75,7 +75,7 @@ public class TestUAX29URLEmailTokenizerF
+ " samba Halta gamba "
+ "ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R\n"
+ "M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb\n"
- + "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m"
+ + "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m"
+ " inter Locutio "
+ "[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/\n"
+ "file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7"
@@ -91,7 +91,7 @@ public class TestUAX29URLEmailTokenizerF
"samba", "Halta", "gamba",
"ftp://119.220.152.185/JgJgdZ/31aW5c/viWlfQSTs5/1c8U5T/ih5rXx/YfUJ/xBW1uHrQo6.R",
"M19nq.0URV4A.Me.CC/mj0kgt6hue/dRXv8YVLOw9v/CIOqb",
- "Https://yu7v33rbt.vC6U3.XN--JXALPDLP/y%4fMSzkGFlm/wbDF4m",
+ "Https://yu7v33rbt.vC6U3.XN--KPRW13D/y%4fMSzkGFlm/wbDF4m",
"inter", "Locutio",
"[c2d4::]/%471j5l/j3KFN%AAAn/Fip-NisKH/",
"file:///aXvSZS34is/eIgM8s~U5dU4Ifd%c7",
Modified: lucene/dev/branches/lucene5339/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/common/src/tools/java/org/apache/lucene/analysis/standard/GenerateJflexTLDMacros.java Thu Dec 19 17:48:47 2013
@@ -60,20 +60,21 @@ public class GenerateJflexTLDMacros {
private static final String APACHE_LICENSE
= "/*" + NL
- + " * Copyright 2001-2005 The Apache Software Foundation." + NL
- + " *" + NL
- + " * Licensed under the Apache License, Version 2.0 (the \"License\");" + NL
- + " * you may not use this file except in compliance with the License." + NL
- + " * You may obtain a copy of the License at" + NL
- + " *" + NL
- + " * http://www.apache.org/licenses/LICENSE-2.0" + NL
- + " *" + NL
- + " * Unless required by applicable law or agreed to in writing, software" + NL
- + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
- + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
- + " * See the License for the specific language governing permissions and" + NL
- + " * limitations under the License." + NL
- + " */" + NL + NL;
+ + " * Licensed to the Apache Software Foundation (ASF) under one or more" + NL
+ + " * contributor license agreements. See the NOTICE file distributed with" + NL
+ + " * this work for additional information regarding copyright ownership." + NL
+ + " * The ASF licenses this file to You under the Apache License, Version 2.0" + NL
+ + " * (the \"License\"); you may not use this file except in compliance with" + NL
+ + " * the License. You may obtain a copy of the License at" + NL
+ + " *" + NL
+ + " * http://www.apache.org/licenses/LICENSE-2.0" + NL
+ + " *" + NL
+ + " * Unless required by applicable law or agreed to in writing, software" + NL
+ + " * distributed under the License is distributed on an \"AS IS\" BASIS," + NL
+ + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied." + NL
+ + " * See the License for the specific language governing permissions and" + NL
+ + " * limitations under the License." + NL
+ + " */" + NL;
private static final Pattern TLD_PATTERN_1
= Pattern.compile("([-A-Za-z0-9]+)\\.\\s+NS\\s+.*");
Modified: lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/uax29/Default.rbbi
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/uax29/Default.rbbi?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/uax29/Default.rbbi (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/uax29/Default.rbbi Thu Dec 19 17:48:47 2013
@@ -14,27 +14,52 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
-# Default RBBI rules, based on UAX#29.
+# This file is from ICU (with some small modifications, to avoid CJK dictionary break)
#
+# Copyright (C) 2002-2013, International Business Machines Corporation
+# and others. All Rights Reserved.
+#
+# file: word.txt
+#
+# ICU Word Break Rules
+# See Unicode Standard Annex #29.
+# These rules are based on UAX #29 Revision 22 for Unicode Version 6.3
+#
+# Note: Updates to word.txt will usually need to be merged into
+# word_POSIX.txt also.
+
+##############################################################################
+#
+# Character class definitions from TR 29
+#
+##############################################################################
!!chain;
+
#
# Character Class Definitions.
#
-$CR = [\p{Word_Break = CR}];
-$LF = [\p{Word_Break = LF}];
-$Newline = [\p{Word_Break = Newline}];
-$Extend = [\p{Word_Break = Extend}];
-$Format = [\p{Word_Break = Format}];
-$Katakana = [\p{Word_Break = Katakana}];
-$ALetter = [\p{Word_Break = ALetter}];
-$MidNumLet = [\p{Word_Break = MidNumLet}];
-$MidLetter = [\p{Word_Break = MidLetter}];
-$MidNum = [\p{Word_Break = MidNum}];
-$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]];
-$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
+$CR = [\p{Word_Break = CR}];
+$LF = [\p{Word_Break = LF}];
+$Newline = [\p{Word_Break = Newline}];
+$Extend = [\p{Word_Break = Extend}];
+$Regional_Indicator = [\p{Word_Break = Regional_Indicator}];
+$Format = [\p{Word_Break = Format}];
+$Katakana = [\p{Word_Break = Katakana}];
+$Hebrew_Letter = [\p{Word_Break = Hebrew_Letter}];
+$ALetter = [\p{Word_Break = ALetter}];
+$Single_Quote = [\p{Word_Break = Single_Quote}];
+$Double_Quote = [\p{Word_Break = Double_Quote}];
+$MidNumLet = [\p{Word_Break = MidNumLet}];
+$MidLetter = [\p{Word_Break = MidLetter}];
+$MidNum = [\p{Word_Break = MidNum}];
+$Numeric = [\p{Word_Break = Numeric}[[:Decomposition_Type=Wide:]&[:General_Category=Decimal_Number:]]];
+$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
+
+$Han = [:Han:];
+$Hiragana = [:Hiragana:];
# Dictionary character set, for triggering language-based break engines. Currently
@@ -42,24 +67,34 @@ $ExtendNumLet = [\p{Word_Break = ExtendN
# 5.0 or later as the definition of Complex_Context was corrected to include all
# characters requiring dictionary break.
-$dictionary = [:LineBreak = Complex_Context:];
$Control = [\p{Grapheme_Cluster_Break = Control}];
-$ALetterPlus = [$ALetter [$dictionary-$Extend-$Control]]; # Note: default ALetter does not
- # include the dictionary characters.
+$HangulSyllable = [\uac00-\ud7a3];
+$ComplexContext = [:LineBreak = Complex_Context:];
+$KanaKanji = [$Han $Hiragana $Katakana];
+$dictionaryCJK = [$Han $Hiragana $HangulSyllable];
+$dictionary = [$ComplexContext];
+
+# leave CJK scripts out of ALetterPlus
+$ALetterPlus = [$ALetter-$dictionaryCJK [$ComplexContext-$Extend-$Control]];
+
#
# Rules 4 Ignore Format and Extend characters,
# except when they appear at the beginning of a region of text.
#
-$KatakanaEx = $Katakana ($Extend | $Format)*;
-$ALetterEx = $ALetterPlus ($Extend | $Format)*;
-$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
-$MidLetterEx = $MidLetter ($Extend | $Format)*;
-$MidNumEx = $MidNum ($Extend | $Format)*;
-$NumericEx = $Numeric ($Extend | $Format)*;
-$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
+# TODO: check if handling of katakana in dictionary makes rules incorrect/void
+$KatakanaEx = $Katakana ($Extend | $Format)*;
+$Hebrew_LetterEx = $Hebrew_Letter ($Extend | $Format)*;
+$ALetterEx = $ALetterPlus ($Extend | $Format)*;
+$Single_QuoteEx = $Single_Quote ($Extend | $Format)*;
+$Double_QuoteEx = $Double_Quote ($Extend | $Format)*;
+$MidNumLetEx = $MidNumLet ($Extend | $Format)*;
+$MidLetterEx = $MidLetter ($Extend | $Format)*;
+$MidNumEx = $MidNum ($Extend | $Format)*;
+$NumericEx = $Numeric ($Extend | $Format)*;
+$ExtendNumLetEx = $ExtendNumLet ($Extend | $Format)*;
+$Regional_IndicatorEx = $Regional_Indicator ($Extend | $Format)*;
-$Hiragana = [\p{script=Hiragana}];
$Ideographic = [\p{Ideographic}];
$HiraganaEx = $Hiragana ($Extend | $Format)*;
$IdeographicEx = $Ideographic ($Extend | $Format)*;
@@ -77,23 +112,31 @@ $CR $LF;
# of a region of Text. The rule here comes into play when the start of text
# begins with a group of Format chars, or with a "word" consisting of a single
# char that is not in any of the listed word break categories followed by
-# format char(s).
+# format char(s), or is not a CJK dictionary character.
[^$CR $LF $Newline]? ($Extend | $Format)+;
$NumericEx {100};
$ALetterEx {200};
+$HangulSyllable {200};
+$Hebrew_LetterEx{200};
$KatakanaEx {300}; # note: these status values override those from rule 5
-$HiraganaEx {300}; # by virtual of being numerically larger.
+$HiraganaEx {300}; # by virtue of being numerically larger.
$IdeographicEx {400}; #
#
# rule 5
# Do not break between most letters.
#
-$ALetterEx $ALetterEx {200};
+($ALetterEx | $Hebrew_LetterEx) ($ALetterEx | $Hebrew_LetterEx) {200};
# rule 6 and 7
-$ALetterEx ($MidLetterEx | $MidNumLetEx) $ALetterEx {200};
+($ALetterEx | $Hebrew_LetterEx) ($MidLetterEx | $MidNumLetEx | $Single_QuoteEx) ($ALetterEx | $Hebrew_LetterEx) {200};
+
+# rule 7a
+$Hebrew_LetterEx $Single_QuoteEx {200};
+
+# rule 7b and 7c
+$Hebrew_LetterEx $Double_QuoteEx $Hebrew_LetterEx {200};
# rule 8
@@ -101,27 +144,35 @@ $NumericEx $NumericEx {100};
# rule 9
-$ALetterEx $NumericEx {200};
+($ALetterEx | $Hebrew_LetterEx) $NumericEx {200};
# rule 10
-$NumericEx $ALetterEx {200};
+$NumericEx ($ALetterEx | $Hebrew_LetterEx) {200};
# rule 11 and 12
-$NumericEx ($MidNumEx | $MidNumLetEx) $NumericEx {100};
+$NumericEx ($MidNumEx | $MidNumLetEx | $Single_QuoteEx) $NumericEx {100};
# rule 13
-
$KatakanaEx $KatakanaEx {300};
# rule 13a/b
-$ALetterEx $ExtendNumLetEx {200}; # (13a)
-$NumericEx $ExtendNumLetEx {100}; # (13a)
-$KatakanaEx $ExtendNumLetEx {300}; # (13a)
-$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a)
-
-$ExtendNumLetEx $ALetterEx {200}; # (13b)
-$ExtendNumLetEx $NumericEx {100}; # (13b)
-$ExtendNumLetEx $KatakanaEx {300}; # (13b)
+$ALetterEx $ExtendNumLetEx {200}; # (13a)
+$Hebrew_LetterEx $ExtendNumLetEx {200}; # (13a)
+$NumericEx $ExtendNumLetEx {100}; # (13a)
+$KatakanaEx $ExtendNumLetEx {300}; # (13a)
+$ExtendNumLetEx $ExtendNumLetEx {200}; # (13a)
+
+$ExtendNumLetEx $ALetterEx {200}; # (13b)
+$ExtendNumLetEx $Hebrew_Letter {200}; # (13b)
+$ExtendNumLetEx $NumericEx {100}; # (13b)
+$ExtendNumLetEx $KatakanaEx {300}; # (13b)
+
+# rule 13c
+
+$Regional_IndicatorEx $Regional_IndicatorEx;
+
+# special handling for CJK characters: chain for later dictionary segmentation
+$HangulSyllable $HangulSyllable {200};
Modified: lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/BasicFoldings.txt Thu Dec 19 17:48:47 2013
@@ -78,7 +78,6 @@ FF0D>002D
## Space Folding
# Rule: [[:Zs:] - [:Changes_When_NFKC_Casefolded=Yes:] - [\u0020]] > 0020
1680>0020
-180E>0020
## Spacing Accents folding (done by kd)
Modified: lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfc.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfc.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfc.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfc.txt Thu Dec 19 17:48:47 2013
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2012, International Business Machines
+# Copyright (C) 1999-2013, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfc.txt
@@ -7,7 +7,7 @@
#
# Complete data for Unicode NFC normalization.
-* Unicode 6.1.0
+* Unicode 6.3.0
# Canonical_Combining_Class (ccc) values
0300..0314:230
Modified: lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc.txt Thu Dec 19 17:48:47 2013
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2012, International Business Machines
+# Copyright (C) 1999-2013, International Business Machines
# Corporation and others. All Rights Reserved.
#
# file name: nfkc.txt
@@ -11,7 +11,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.
-* Unicode 6.1.0
+* Unicode 6.3.0
00A0>0020
00A8>0020 0308
Modified: lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt?rev=1552377&r1=1552376&r2=1552377&view=diff
==============================================================================
--- lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt (original)
+++ lucene/dev/branches/lucene5339/lucene/analysis/icu/src/data/utr30/nfkc_cf.txt Thu Dec 19 17:48:47 2013
@@ -1,5 +1,5 @@
# Unicode Character Database
-# Copyright (c) 1991-2012 Unicode, Inc.
+# Copyright (c) 1991-2013 Unicode, Inc.
# For terms of use, see http://www.unicode.org/terms_of_use.html
# For documentation, see http://www.unicode.org/reports/tr44/
#
@@ -12,7 +12,7 @@
# and reformatted into syntax for the gennorm2 Normalizer2 data generator tool.
# Use this file as the third gennorm2 input file after nfc.txt and nfkc.txt.
-* Unicode 6.1.0
+* Unicode 6.3.0
0041>0061
0042>0062
@@ -537,6 +537,7 @@
0555>0585
0556>0586
0587>0565 0582
+061C>
0675>0627 0674
0676>0648 0674
0677>06C7 0674
@@ -627,7 +628,7 @@
10FC>10DC
115F..1160>
17B4..17B5>
-180B..180D>
+180B..180E>
1D2C>0061
1D2D>00E6
1D2E>0062