You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2013/12/06 17:51:45 UTC

svn commit: r1548595 [4/6] - in /lucene/dev/trunk/lucene: ./ analysis/common/ analysis/common/src/java/org/apache/lucene/analysis/charfilter/ analysis/common/src/java/org/apache/lucene/analysis/standard/ analysis/common/src/java/org/apache/lucene/analy...

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex?rev=1548595&r1=1548594&r2=1548595&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex Fri Dec  6 16:51:43 2013
@@ -35,11 +35,13 @@ import org.apache.lucene.analysis.tokena
  *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
  *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
  *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
+ *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
  * </ul>
  */
 %%
 
-%unicode 6.1
+%unicode 6.3
 %integer
 %final
 %public
@@ -50,33 +52,39 @@ import org.apache.lucene.analysis.tokena
 %buffer 4096
 
 %include SUPPLEMENTARY.jflex-macro
-ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
-Format =  ([\p{WB:Format}] | {FormatSupp})
-Numeric = ([\p{WB:Numeric}] | {NumericSupp})
-Extend =  ([\p{WB:Extend}] | {ExtendSupp})
-Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
-MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
-MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
-MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
-ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
-ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
-Han = ([\p{Script:Han}] | {HanSupp})
-Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
+ALetter           = (\p{WB:ALetter}                                     | {ALetterSupp})
+Format            = (\p{WB:Format}                                      | {FormatSupp})
+Numeric           = ([\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] | {NumericSupp})
+Extend            = (\p{WB:Extend}                                      | {ExtendSupp})
+Katakana          = (\p{WB:Katakana}                                    | {KatakanaSupp})
+MidLetter         = (\p{WB:MidLetter}                                   | {MidLetterSupp})
+MidNum            = (\p{WB:MidNum}                                      | {MidNumSupp})
+MidNumLet         = (\p{WB:MidNumLet}                                   | {MidNumLetSupp})
+ExtendNumLet      = (\p{WB:ExtendNumLet}                                | {ExtendNumLetSupp})
+ComplexContext    = (\p{LB:Complex_Context}                             | {ComplexContextSupp})
+Han               = (\p{Script:Han}                                     | {HanSupp})
+Hiragana          = (\p{Script:Hiragana}                                | {HiraganaSupp})
+SingleQuote       = (\p{WB:Single_Quote}                                | {SingleQuoteSupp})
+DoubleQuote       = (\p{WB:Double_Quote}                                | {DoubleQuoteSupp})
+HebrewLetter      = (\p{WB:Hebrew_Letter}                               | {HebrewLetterSupp})
+RegionalIndicator = (\p{WB:Regional_Indicator}                          | {RegionalIndicatorSupp})
+HebrewOrALetter   = ({HebrewLetter} | {ALetter})
 
-// Script=Hangul & Aletter
-HangulEx       = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
 // UAX#29 WB4. X (Extend | Format)* --> X
 //
-ALetterEx      = {ALetter}                     ({Format} | {Extend})*
-// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
-NumericEx      = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
-KatakanaEx     = {Katakana}                    ({Format} | {Extend})* 
-MidLetterEx    = ({MidLetter} | {MidNumLet})   ({Format} | {Extend})* 
-MidNumericEx   = ({MidNum} | {MidNumLet})      ({Format} | {Extend})*
-ExtendNumLetEx = {ExtendNumLet}                ({Format} | {Extend})*
-
-HanEx = {Han} ({Format} | {Extend})*
-HiraganaEx = {Hiragana} ({Format} | {Extend})*
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] ({Format} | {Extend})*
+HebrewOrALetterEx   = {HebrewOrALetter}                                         ({Format} | {Extend})*
+NumericEx           = {Numeric}                                                 ({Format} | {Extend})*
+KatakanaEx          = {Katakana}                                                ({Format} | {Extend})* 
+MidLetterEx         = ({MidLetter} | {MidNumLet} | {SingleQuote})               ({Format} | {Extend})* 
+MidNumericEx        = ({MidNum} | {MidNumLet} | {SingleQuote})                  ({Format} | {Extend})*
+ExtendNumLetEx      = {ExtendNumLet}                                            ({Format} | {Extend})*
+HanEx               = {Han}                                                     ({Format} | {Extend})*
+HiraganaEx          = {Hiragana}                                                ({Format} | {Extend})*
+SingleQuoteEx       = {SingleQuote}                                             ({Format} | {Extend})*                                            
+DoubleQuoteEx       = {DoubleQuote}                                             ({Format} | {Extend})*
+HebrewLetterEx      = {HebrewLetter}                                            ({Format} | {Extend})*
+RegionalIndicatorEx = {RegionalIndicator}                                       ({Format} | {Extend})*
 
 // URL and E-mail syntax specifications:
 //
@@ -213,40 +221,47 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 {EMAIL} { return EMAIL_TYPE; }
 
 // UAX#29 WB8.   Numeric × Numeric
-//        WB11.  Numeric (MidNum | MidNumLet) × Numeric
-//        WB12.  Numeric × (MidNum | MidNumLet) Numeric
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx} 
-                              | {MidNumericEx} {NumericEx} 
-                              | {NumericEx})*
-{ExtendNumLetEx}* 
+//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
+//        WB12.  Numeric × (MidNum | MidNumLet | Single_Quote) Numeric
+//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+//        WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
   { return NUMERIC_TYPE; }
 
 // subset of the below for typing purposes only!
 {HangulEx}+
   { return HANGUL_TYPE; }
-
+  
 {KatakanaEx}+
   { return KATAKANA_TYPE; }
 
-// UAX#29 WB5.   ALetter × ALetter
-//        WB6.   ALetter × (MidLetter | MidNumLet) ALetter
-//        WB7.   ALetter (MidLetter | MidNumLet) × ALetter
-//        WB9.   ALetter × Numeric
-//        WB10.  Numeric × ALetter
+// UAX#29 WB5.   (ALetter | Hebrew_Letter) × (ALetter | Hebrew_Letter)
+//        WB6.   (ALetter | Hebrew_Letter) × (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
+//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) × (ALetter | Hebrew_Letter)
+//        WB7a.  Hebrew_Letter × Single_Quote
+//        WB7b.  Hebrew_Letter × Double_Quote Hebrew_Letter
+//        WB7c.  Hebrew_Letter Double_Quote × Hebrew_Letter
+//        WB9.   (ALetter | Hebrew_Letter) × Numeric
+//        WB10.  Numeric × (ALetter | Hebrew_Letter)
 //        WB13.  Katakana × Katakana
-//        WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
-//        WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
+//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
+//        WB13b. ExtendNumLet × (ALetter | Hebrew_Letter | Numeric | Katakana) 
 //
-{ExtendNumLetEx}*  ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) 
-({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})* 
-                   | ( {NumericEx}  ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
-                     | {ALetterEx}  ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx}  {ALetterEx} | {ALetterEx})* )+ ) )*
-{ExtendNumLetEx}*  
+{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                            )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx} {HebrewLetterEx}      )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx}         )*
+                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  )* {HebrewOrALetterEx} )*
+                     )+
+                   )
+({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                            )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}   {HebrewLetterEx}    )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} )* {NumericEx}         )*
+                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  )* {HebrewOrALetterEx} )*
+                     )+
+                   )
+)*
+{ExtendNumLetEx}* 
   { return WORD_TYPE; }
 
 
@@ -258,7 +273,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 6.1, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
@@ -280,6 +295,8 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 // UAX#29 WB3.   CR × LF
 //        WB3a.  (Newline | CR | LF) ÷
 //        WB3b.  ÷ (Newline | CR | LF)
+//        WB13c. Regional_Indicator × Regional_Indicator
 //        WB14.  Any ÷ Any
 //
-[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
+{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
+  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java?rev=1548595&r1=1548594&r2=1548595&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java Fri Dec  6 16:51:43 2013
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex. */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT */
 
 package org.apache.lucene.analysis.wikipedia;
 
@@ -84,21 +84,20 @@ class WikipediaTokenizerImpl {
   private static final int [] ZZ_ACTION = zzUnpackAction();
 
   private static final String ZZ_ACTION_PACKED_0 =
-    "\12\0\4\1\4\2\1\3\1\1\1\4\1\1\2\5"+
-    "\1\6\2\5\1\7\1\5\2\10\1\11\1\12\1\11"+
-    "\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+
-    "\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+
-    "\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+
-    "\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+
-    "\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+
-    "\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+
-    "\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+
-    "\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+
-    "\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+
-    "\1\56";
+    "\12\0\4\1\4\2\1\3\1\4\1\1\2\5\1\6"+
+    "\1\5\1\7\1\5\2\10\1\11\1\5\1\12\1\11"+
+    "\1\13\1\14\1\15\1\16\1\15\1\17\1\20\1\10"+
+    "\1\21\1\10\4\22\1\23\1\24\1\25\1\26\3\0"+
+    "\1\27\14\0\1\30\1\31\1\32\1\33\1\11\1\0"+
+    "\1\34\1\35\1\36\1\0\1\37\1\0\1\40\3\0"+
+    "\1\41\1\42\2\43\1\42\2\44\2\0\1\43\1\0"+
+    "\14\43\1\42\3\0\1\11\1\45\3\0\1\46\1\47"+
+    "\5\0\1\50\4\0\1\50\2\0\2\50\2\0\1\11"+
+    "\5\0\1\31\1\42\1\43\1\51\3\0\1\11\2\0"+
+    "\1\52\30\0\1\53\2\0\1\54\1\55\1\56";
 
   private static int [] zzUnpackAction() {
-    int [] result = new int[184];
+    int [] result = new int[181];
     int offset = 0;
     offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
     return result;
@@ -125,30 +124,30 @@ class WikipediaTokenizerImpl {
   private static final String ZZ_ROWMAP_PACKED_0 =
     "\0\0\0\54\0\130\0\204\0\260\0\334\0\u0108\0\u0134"+
     "\0\u0160\0\u018c\0\u01b8\0\u01e4\0\u0210\0\u023c\0\u0268\0\u0294"+
-    "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u0370\0\u01b8\0\u039c"+
-    "\0\u03c8\0\u03f4\0\u0420\0\u044c\0\u0478\0\u01b8\0\u039c\0\u04a4"+
-    "\0\u01b8\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
-    "\0\u0604\0\u0630\0\u065c\0\u0688\0\u06b4\0\u01b8\0\u06e0\0\u039c"+
-    "\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+
-    "\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
-    "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+
-    "\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+
+    "\0\u02c0\0\u02ec\0\u01b8\0\u0318\0\u0344\0\u01b8\0\u0370\0\u039c"+
+    "\0\u03c8\0\u03f4\0\u0420\0\u01b8\0\u0370\0\u044c\0\u0478\0\u01b8"+
+    "\0\u04a4\0\u04d0\0\u04fc\0\u0528\0\u0554\0\u0580\0\u05ac\0\u05d8"+
+    "\0\u0604\0\u0630\0\u065c\0\u01b8\0\u0688\0\u0370\0\u06b4\0\u06e0"+
+    "\0\u070c\0\u01b8\0\u01b8\0\u0738\0\u0764\0\u0790\0\u01b8\0\u07bc"+
+    "\0\u07e8\0\u0814\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+
+    "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u01b8\0\u01b8\0\u0a24"+
+    "\0\u0a50\0\u0a7c\0\u0a7c\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b2c"+
     "\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+
-    "\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
+    "\0\u0814\0\u0cb8\0\u0ce4\0\u0d10\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+
     "\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+
-    "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+
-    "\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+
+    "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u01b8"+
+    "\0\u1080\0\u10ac\0\u10d8\0\u1104\0\u01b8\0\u1130\0\u115c\0\u1188"+
     "\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+
-    "\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+
-    "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+
-    "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+
-    "\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
+    "\0\u1314\0\u1340\0\u07e8\0\u0974\0\u136c\0\u1398\0\u13c4\0\u13f0"+
+    "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u01b8\0\u14cc\0\u14f8\0\u1524"+
+    "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u01b8\0\u1658"+
+    "\0\u1684\0\u16b0\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+
     "\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+
     "\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+
-    "\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8";
+    "\0\u1aa4\0\u1ad0\0\u01b8\0\u01b8\0\u01b8";
 
   private static int [] zzUnpackRowMap() {
-    int [] result = new int[184];
+    int [] result = new int[181];
     int offset = 0;
     offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
     return result;
@@ -172,152 +171,149 @@ class WikipediaTokenizerImpl {
 
   private static final String ZZ_TRANS_PACKED_0 =
     "\1\13\1\14\5\13\1\15\1\13\1\16\3\13\1\17"+
-    "\1\20\1\21\1\22\1\23\1\24\2\13\1\25\2\13"+
-    "\15\17\1\26\2\13\3\17\1\13\7\27\1\30\5\27"+
-    "\4\31\1\27\1\32\3\27\1\33\1\27\15\31\3\27"+
-    "\3\31\10\27\1\30\5\27\4\34\1\27\1\32\3\27"+
-    "\1\35\1\27\15\34\3\27\3\34\1\27\7\36\1\37"+
-    "\5\36\4\40\1\36\1\32\2\27\1\36\1\41\1\36"+
-    "\15\40\3\36\1\42\2\40\2\36\1\43\5\36\1\37"+
-    "\5\36\4\44\1\36\1\45\2\36\1\46\2\36\15\44"+
-    "\3\36\3\44\10\36\1\37\5\36\4\47\1\36\1\45"+
-    "\2\36\1\46\2\36\15\47\3\36\3\47\10\36\1\37"+
-    "\5\36\4\47\1\36\1\45\2\36\1\50\2\36\15\47"+
-    "\3\36\3\47\10\36\1\37\1\36\1\51\3\36\4\52"+
-    "\1\36\1\45\5\36\15\52\3\36\3\52\10\36\1\53"+
-    "\5\36\4\54\1\36\1\45\5\36\15\54\1\36\1\55"+
-    "\1\36\3\54\1\36\1\56\1\57\5\56\1\60\1\56"+
-    "\1\61\3\56\4\62\1\56\1\63\2\56\1\64\2\56"+
-    "\15\62\2\56\1\65\3\62\1\56\55\0\1\66\62\0"+
-    "\1\67\4\0\4\70\7\0\6\70\1\71\6\70\3\0"+
-    "\3\70\12\0\1\72\43\0\1\73\1\74\1\75\1\76"+
-    "\2\77\1\0\1\100\3\0\1\100\1\17\1\20\1\21"+
-    "\1\22\7\0\15\17\3\0\3\17\3\0\1\101\1\0"+
-    "\1\102\2\103\1\0\1\104\3\0\1\104\3\20\1\22"+
-    "\7\0\15\20\3\0\3\20\2\0\1\73\1\105\1\75"+
-    "\1\76\2\103\1\0\1\104\3\0\1\104\1\21\1\20"+
-    "\1\21\1\22\7\0\15\21\3\0\3\21\3\0\1\106"+
-    "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\4\22"+
-    "\7\0\15\22\3\0\3\22\24\0\1\13\55\0\1\107"+
-    "\73\0\1\110\16\0\1\67\4\0\4\70\7\0\15\70"+
-    "\3\0\3\70\16\0\4\31\7\0\15\31\3\0\3\31"+
-    "\24\0\1\27\56\0\1\111\42\0\4\34\7\0\15\34"+
-    "\3\0\3\34\27\0\1\112\42\0\4\40\7\0\15\40"+
-    "\3\0\3\40\16\0\4\40\7\0\2\40\1\113\12\40"+
-    "\3\0\3\40\2\0\1\114\67\0\4\44\7\0\15\44"+
-    "\3\0\3\44\24\0\1\36\55\0\1\115\43\0\4\47"+
-    "\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\117"+
-    "\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\120"+
-    "\4\0\4\70\7\0\15\70\3\0\3\70\16\0\4\54"+
-    "\7\0\15\54\3\0\3\54\47\0\1\117\6\0\1\121"+
-    "\63\0\1\122\57\0\4\62\7\0\15\62\3\0\3\62"+
-    "\24\0\1\56\55\0\1\123\43\0\4\70\7\0\15\70"+
-    "\3\0\3\70\14\0\1\36\1\0\4\124\1\0\3\125"+
-    "\3\0\15\124\3\0\3\124\14\0\1\36\1\0\4\124"+
-    "\1\0\3\125\3\0\3\124\1\126\11\124\3\0\3\124"+
-    "\16\0\1\127\1\0\1\127\10\0\15\127\3\0\3\127"+
-    "\16\0\1\130\1\131\1\132\1\133\7\0\15\130\3\0"+
-    "\3\130\16\0\1\134\1\0\1\134\10\0\15\134\3\0"+
-    "\3\134\16\0\1\135\1\136\1\135\1\136\7\0\15\135"+
-    "\3\0\3\135\16\0\1\137\2\140\1\141\7\0\15\137"+
-    "\3\0\3\137\16\0\1\100\2\142\10\0\15\100\3\0"+
-    "\3\100\16\0\1\143\2\144\1\145\7\0\15\143\3\0"+
-    "\3\143\16\0\4\136\7\0\15\136\3\0\3\136\16\0"+
-    "\1\146\2\147\1\150\7\0\15\146\3\0\3\146\16\0"+
-    "\1\151\2\152\1\153\7\0\15\151\3\0\3\151\16\0"+
-    "\1\154\1\144\1\155\1\145\7\0\15\154\3\0\3\154"+
-    "\16\0\1\156\2\131\1\133\7\0\15\156\3\0\3\156"+
-    "\30\0\1\157\1\160\64\0\1\161\27\0\4\40\7\0"+
-    "\2\40\1\162\12\40\3\0\3\40\2\0\1\163\101\0"+
-    "\1\164\1\165\40\0\4\70\7\0\6\70\1\166\6\70"+
-    "\3\0\3\70\2\0\1\167\63\0\1\170\71\0\1\171"+
-    "\1\172\34\0\1\173\1\0\1\36\1\0\4\124\1\0"+
-    "\3\125\3\0\15\124\3\0\3\124\16\0\4\174\1\0"+
-    "\3\125\3\0\15\174\3\0\3\174\12\0\1\173\1\0"+
-    "\1\36\1\0\4\124\1\0\3\125\3\0\10\124\1\175"+
-    "\4\124\3\0\3\124\2\0\1\73\13\0\1\127\1\0"+
-    "\1\127\10\0\15\127\3\0\3\127\3\0\1\176\1\0"+
-    "\1\102\2\177\6\0\1\130\1\131\1\132\1\133\7\0"+
-    "\15\130\3\0\3\130\3\0\1\200\1\0\1\102\2\201"+
-    "\1\0\1\202\3\0\1\202\3\131\1\133\7\0\15\131"+
-    "\3\0\3\131\3\0\1\203\1\0\1\102\2\201\1\0"+
-    "\1\202\3\0\1\202\1\132\1\131\1\132\1\133\7\0"+
-    "\15\132\3\0\3\132\3\0\1\204\1\0\1\102\2\177"+
-    "\6\0\4\133\7\0\15\133\3\0\3\133\3\0\1\205"+
-    "\2\0\1\205\7\0\1\135\1\136\1\135\1\136\7\0"+
-    "\15\135\3\0\3\135\3\0\1\205\2\0\1\205\7\0"+
-    "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\1\0"+
-    "\1\102\2\177\6\0\1\137\2\140\1\141\7\0\15\137"+
-    "\3\0\3\137\3\0\1\201\1\0\1\102\2\201\1\0"+
-    "\1\202\3\0\1\202\3\140\1\141\7\0\15\140\3\0"+
-    "\3\140\3\0\1\177\1\0\1\102\2\177\6\0\4\141"+
-    "\7\0\15\141\3\0\3\141\3\0\1\202\2\0\2\202"+
-    "\1\0\1\202\3\0\1\202\3\142\10\0\15\142\3\0"+
-    "\3\142\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+
-    "\3\0\1\100\1\143\2\144\1\145\7\0\15\143\3\0"+
-    "\3\143\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+
-    "\3\0\1\104\3\144\1\145\7\0\15\144\3\0\3\144"+
-    "\3\0\1\106\1\0\1\102\2\77\1\0\1\100\3\0"+
-    "\1\100\4\145\7\0\15\145\3\0\3\145\3\0\1\77"+
-    "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\146"+
-    "\2\147\1\150\7\0\15\146\3\0\3\146\3\0\1\103"+
-    "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\147"+
-    "\1\150\7\0\15\147\3\0\3\147\3\0\1\77\1\0"+
-    "\1\102\2\77\1\0\1\100\3\0\1\100\4\150\7\0"+
-    "\15\150\3\0\3\150\3\0\1\100\2\0\2\100\1\0"+
-    "\1\100\3\0\1\100\1\151\2\152\1\153\7\0\15\151"+
-    "\3\0\3\151\3\0\1\104\2\0\2\104\1\0\1\104"+
-    "\3\0\1\104\3\152\1\153\7\0\15\152\3\0\3\152"+
-    "\3\0\1\100\2\0\2\100\1\0\1\100\3\0\1\100"+
-    "\4\153\7\0\15\153\3\0\3\153\3\0\1\206\1\0"+
-    "\1\102\2\77\1\0\1\100\3\0\1\100\1\154\1\144"+
-    "\1\155\1\145\7\0\15\154\3\0\3\154\3\0\1\207"+
-    "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\155"+
-    "\1\144\1\155\1\145\7\0\15\155\3\0\3\155\3\0"+
-    "\1\204\1\0\1\102\2\177\6\0\1\156\2\131\1\133"+
-    "\7\0\15\156\3\0\3\156\31\0\1\160\54\0\1\210"+
-    "\64\0\1\211\26\0\4\40\7\0\15\40\3\0\1\40"+
-    "\1\212\1\40\31\0\1\165\54\0\1\213\35\0\1\36"+
-    "\1\0\4\124\1\0\3\125\3\0\3\124\1\214\11\124"+
-    "\3\0\3\124\2\0\1\215\102\0\1\172\54\0\1\216"+
-    "\34\0\1\217\52\0\1\173\3\0\4\174\7\0\15\174"+
-    "\3\0\3\174\12\0\1\173\1\0\1\220\1\0\4\124"+
-    "\1\0\3\125\3\0\15\124\3\0\3\124\16\0\1\221"+
-    "\1\133\1\221\1\133\7\0\15\221\3\0\3\221\16\0"+
-    "\4\141\7\0\15\141\3\0\3\141\16\0\4\145\7\0"+
-    "\15\145\3\0\3\145\16\0\4\150\7\0\15\150\3\0"+
-    "\3\150\16\0\4\153\7\0\15\153\3\0\3\153\16\0"+
-    "\1\222\1\145\1\222\1\145\7\0\15\222\3\0\3\222"+
-    "\16\0\4\133\7\0\15\133\3\0\3\133\16\0\4\223"+
-    "\7\0\15\223\3\0\3\223\33\0\1\224\61\0\1\225"+
-    "\30\0\4\40\6\0\1\226\15\40\3\0\2\40\1\227"+
-    "\33\0\1\230\32\0\1\173\1\0\1\36\1\0\4\124"+
-    "\1\0\3\125\3\0\10\124\1\231\4\124\3\0\3\124"+
-    "\2\0\1\232\104\0\1\233\36\0\4\234\7\0\15\234"+
-    "\3\0\3\234\3\0\1\176\1\0\1\102\2\177\6\0"+
-    "\1\221\1\133\1\221\1\133\7\0\15\221\3\0\3\221"+
-    "\3\0\1\206\1\0\1\102\2\77\1\0\1\100\3\0"+
-    "\1\100\1\222\1\145\1\222\1\145\7\0\15\222\3\0"+
-    "\3\222\3\0\1\205\2\0\1\205\7\0\4\223\7\0"+
-    "\15\223\3\0\3\223\34\0\1\235\55\0\1\236\26\0"+
-    "\1\237\60\0\4\40\6\0\1\226\15\40\3\0\3\40"+
-    "\34\0\1\240\31\0\1\173\1\0\1\117\1\0\4\124"+
-    "\1\0\3\125\3\0\15\124\3\0\3\124\34\0\1\241"+
-    "\32\0\1\242\2\0\4\234\7\0\15\234\3\0\3\234"+
-    "\35\0\1\243\62\0\1\244\20\0\1\245\77\0\1\246"+
-    "\53\0\1\247\32\0\1\36\1\0\4\174\1\0\3\125"+
-    "\3\0\15\174\3\0\3\174\36\0\1\250\53\0\1\251"+
-    "\33\0\4\252\7\0\15\252\3\0\3\252\36\0\1\253"+
-    "\53\0\1\254\54\0\1\255\61\0\1\256\11\0\1\257"+
-    "\12\0\4\252\7\0\15\252\3\0\3\252\37\0\1\260"+
-    "\53\0\1\261\54\0\1\262\22\0\1\13\62\0\4\263"+
-    "\7\0\15\263\3\0\3\263\40\0\1\264\53\0\1\265"+
-    "\43\0\1\266\26\0\2\263\1\0\2\263\1\0\2\263"+
-    "\2\0\5\263\7\0\15\263\3\0\4\263\27\0\1\267"+
-    "\53\0\1\270\24\0";
+    "\1\20\1\21\1\22\1\23\3\13\1\24\2\13\15\17"+
+    "\1\25\2\13\3\17\1\13\7\26\1\27\5\26\4\30"+
+    "\5\26\1\31\1\26\15\30\3\26\3\30\10\26\1\27"+
+    "\5\26\4\32\5\26\1\33\1\26\15\32\3\26\3\32"+
+    "\1\26\7\34\1\35\5\34\4\36\1\34\1\37\2\26"+
+    "\1\34\1\40\1\34\15\36\3\34\1\41\2\36\2\34"+
+    "\1\42\5\34\1\35\5\34\4\43\4\34\1\44\2\34"+
+    "\15\43\3\34\3\43\10\34\1\35\5\34\4\45\4\34"+
+    "\1\44\2\34\15\45\3\34\3\45\10\34\1\35\5\34"+
+    "\4\45\4\34\1\46\2\34\15\45\3\34\3\45\10\34"+
+    "\1\35\1\34\1\47\3\34\4\50\7\34\15\50\3\34"+
+    "\3\50\10\34\1\51\5\34\4\52\7\34\15\52\1\34"+
+    "\1\53\1\34\3\52\1\34\1\54\1\55\5\54\1\56"+
+    "\1\54\1\57\3\54\4\60\4\54\1\61\2\54\15\60"+
+    "\2\54\1\62\3\60\1\54\55\0\1\63\62\0\1\64"+
+    "\4\0\4\65\7\0\6\65\1\66\6\65\3\0\3\65"+
+    "\12\0\1\67\43\0\1\70\1\71\1\72\1\73\2\74"+
+    "\1\0\1\75\3\0\1\75\1\17\1\20\1\21\1\22"+
+    "\7\0\15\17\3\0\3\17\3\0\1\76\1\0\1\77"+
+    "\2\100\1\0\1\101\3\0\1\101\3\20\1\22\7\0"+
+    "\15\20\3\0\3\20\2\0\1\70\1\102\1\72\1\73"+
+    "\2\100\1\0\1\101\3\0\1\101\1\21\1\20\1\21"+
+    "\1\22\7\0\15\21\3\0\3\21\3\0\1\103\1\0"+
+    "\1\77\2\74\1\0\1\75\3\0\1\75\4\22\7\0"+
+    "\15\22\3\0\3\22\26\0\1\104\73\0\1\105\16\0"+
+    "\1\64\4\0\4\65\7\0\15\65\3\0\3\65\16\0"+
+    "\4\30\7\0\15\30\3\0\3\30\27\0\1\106\42\0"+
+    "\4\32\7\0\15\32\3\0\3\32\27\0\1\107\42\0"+
+    "\4\36\7\0\15\36\3\0\3\36\24\0\1\26\45\0"+
+    "\4\36\7\0\2\36\1\110\12\36\3\0\3\36\2\0"+
+    "\1\111\67\0\4\43\7\0\15\43\3\0\3\43\26\0"+
+    "\1\112\43\0\4\45\7\0\15\45\3\0\3\45\26\0"+
+    "\1\113\37\0\1\114\57\0\4\50\7\0\15\50\3\0"+
+    "\3\50\11\0\1\115\4\0\4\65\7\0\15\65\3\0"+
+    "\3\65\16\0\4\52\7\0\15\52\3\0\3\52\47\0"+
+    "\1\114\6\0\1\116\63\0\1\117\57\0\4\60\7\0"+
+    "\15\60\3\0\3\60\26\0\1\120\43\0\4\65\7\0"+
+    "\15\65\3\0\3\65\14\0\1\34\1\0\4\121\1\0"+
+    "\3\122\3\0\15\121\3\0\3\121\14\0\1\34\1\0"+
+    "\4\121\1\0\3\122\3\0\3\121\1\123\11\121\3\0"+
+    "\3\121\16\0\1\124\1\0\1\124\10\0\15\124\3\0"+
+    "\3\124\16\0\1\125\1\126\1\127\1\130\7\0\15\125"+
+    "\3\0\3\125\16\0\1\131\1\0\1\131\10\0\15\131"+
+    "\3\0\3\131\16\0\1\132\1\133\1\132\1\133\7\0"+
+    "\15\132\3\0\3\132\16\0\1\134\2\135\1\136\7\0"+
+    "\15\134\3\0\3\134\16\0\1\75\2\137\10\0\15\75"+
+    "\3\0\3\75\16\0\1\140\2\141\1\142\7\0\15\140"+
+    "\3\0\3\140\16\0\4\133\7\0\15\133\3\0\3\133"+
+    "\16\0\1\143\2\144\1\145\7\0\15\143\3\0\3\143"+
+    "\16\0\1\146\2\147\1\150\7\0\15\146\3\0\3\146"+
+    "\16\0\1\151\1\141\1\152\1\142\7\0\15\151\3\0"+
+    "\3\151\16\0\1\153\2\126\1\130\7\0\15\153\3\0"+
+    "\3\153\30\0\1\154\1\155\64\0\1\156\27\0\4\36"+
+    "\7\0\2\36\1\157\12\36\3\0\3\36\2\0\1\160"+
+    "\101\0\1\161\1\162\40\0\4\65\7\0\6\65\1\163"+
+    "\6\65\3\0\3\65\2\0\1\164\63\0\1\165\71\0"+
+    "\1\166\1\167\34\0\1\170\1\0\1\34\1\0\4\121"+
+    "\1\0\3\122\3\0\15\121\3\0\3\121\16\0\4\171"+
+    "\1\0\3\122\3\0\15\171\3\0\3\171\12\0\1\170"+
+    "\1\0\1\34\1\0\4\121\1\0\3\122\3\0\10\121"+
+    "\1\172\4\121\3\0\3\121\2\0\1\70\13\0\1\124"+
+    "\1\0\1\124\10\0\15\124\3\0\3\124\3\0\1\173"+
+    "\1\0\1\77\2\174\6\0\1\125\1\126\1\127\1\130"+
+    "\7\0\15\125\3\0\3\125\3\0\1\175\1\0\1\77"+
+    "\2\176\1\0\1\177\3\0\1\177\3\126\1\130\7\0"+
+    "\15\126\3\0\3\126\3\0\1\200\1\0\1\77\2\176"+
+    "\1\0\1\177\3\0\1\177\1\127\1\126\1\127\1\130"+
+    "\7\0\15\127\3\0\3\127\3\0\1\201\1\0\1\77"+
+    "\2\174\6\0\4\130\7\0\15\130\3\0\3\130\3\0"+
+    "\1\202\2\0\1\202\7\0\1\132\1\133\1\132\1\133"+
+    "\7\0\15\132\3\0\3\132\3\0\1\202\2\0\1\202"+
+    "\7\0\4\133\7\0\15\133\3\0\3\133\3\0\1\174"+
+    "\1\0\1\77\2\174\6\0\1\134\2\135\1\136\7\0"+
+    "\15\134\3\0\3\134\3\0\1\176\1\0\1\77\2\176"+
+    "\1\0\1\177\3\0\1\177\3\135\1\136\7\0\15\135"+
+    "\3\0\3\135\3\0\1\174\1\0\1\77\2\174\6\0"+
+    "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\2\0"+
+    "\2\177\1\0\1\177\3\0\1\177\3\137\10\0\15\137"+
+    "\3\0\3\137\3\0\1\103\1\0\1\77\2\74\1\0"+
+    "\1\75\3\0\1\75\1\140\2\141\1\142\7\0\15\140"+
+    "\3\0\3\140\3\0\1\76\1\0\1\77\2\100\1\0"+
+    "\1\101\3\0\1\101\3\141\1\142\7\0\15\141\3\0"+
+    "\3\141\3\0\1\103\1\0\1\77\2\74\1\0\1\75"+
+    "\3\0\1\75\4\142\7\0\15\142\3\0\3\142\3\0"+
+    "\1\74\1\0\1\77\2\74\1\0\1\75\3\0\1\75"+
+    "\1\143\2\144\1\145\7\0\15\143\3\0\3\143\3\0"+
+    "\1\100\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
+    "\3\144\1\145\7\0\15\144\3\0\3\144\3\0\1\74"+
+    "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\4\145"+
+    "\7\0\15\145\3\0\3\145\3\0\1\75\2\0\2\75"+
+    "\1\0\1\75\3\0\1\75\1\146\2\147\1\150\7\0"+
+    "\15\146\3\0\3\146\3\0\1\101\2\0\2\101\1\0"+
+    "\1\101\3\0\1\101\3\147\1\150\7\0\15\147\3\0"+
+    "\3\147\3\0\1\75\2\0\2\75\1\0\1\75\3\0"+
+    "\1\75\4\150\7\0\15\150\3\0\3\150\3\0\1\203"+
+    "\1\0\1\77\2\74\1\0\1\75\3\0\1\75\1\151"+
+    "\1\141\1\152\1\142\7\0\15\151\3\0\3\151\3\0"+
+    "\1\204\1\0\1\77\2\100\1\0\1\101\3\0\1\101"+
+    "\1\152\1\141\1\152\1\142\7\0\15\152\3\0\3\152"+
+    "\3\0\1\201\1\0\1\77\2\174\6\0\1\153\2\126"+
+    "\1\130\7\0\15\153\3\0\3\153\31\0\1\155\54\0"+
+    "\1\205\64\0\1\206\26\0\4\36\7\0\15\36\3\0"+
+    "\1\36\1\207\1\36\31\0\1\162\54\0\1\210\35\0"+
+    "\1\34\1\0\4\121\1\0\3\122\3\0\3\121\1\211"+
+    "\11\121\3\0\3\121\2\0\1\212\102\0\1\167\54\0"+
+    "\1\213\34\0\1\214\52\0\1\170\3\0\4\171\7\0"+
+    "\15\171\3\0\3\171\12\0\1\170\1\0\1\215\1\0"+
+    "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\16\0"+
+    "\1\216\1\130\1\216\1\130\7\0\15\216\3\0\3\216"+
+    "\16\0\4\136\7\0\15\136\3\0\3\136\16\0\4\142"+
+    "\7\0\15\142\3\0\3\142\16\0\4\145\7\0\15\145"+
+    "\3\0\3\145\16\0\4\150\7\0\15\150\3\0\3\150"+
+    "\16\0\1\217\1\142\1\217\1\142\7\0\15\217\3\0"+
+    "\3\217\16\0\4\130\7\0\15\130\3\0\3\130\16\0"+
+    "\4\220\7\0\15\220\3\0\3\220\33\0\1\221\61\0"+
+    "\1\222\30\0\4\36\6\0\1\223\15\36\3\0\2\36"+
+    "\1\224\33\0\1\225\32\0\1\170\1\0\1\34\1\0"+
+    "\4\121\1\0\3\122\3\0\10\121\1\226\4\121\3\0"+
+    "\3\121\2\0\1\227\104\0\1\230\36\0\4\231\7\0"+
+    "\15\231\3\0\3\231\3\0\1\173\1\0\1\77\2\174"+
+    "\6\0\1\216\1\130\1\216\1\130\7\0\15\216\3\0"+
+    "\3\216\3\0\1\203\1\0\1\77\2\74\1\0\1\75"+
+    "\3\0\1\75\1\217\1\142\1\217\1\142\7\0\15\217"+
+    "\3\0\3\217\3\0\1\202\2\0\1\202\7\0\4\220"+
+    "\7\0\15\220\3\0\3\220\34\0\1\232\55\0\1\233"+
+    "\26\0\1\234\60\0\4\36\6\0\1\223\15\36\3\0"+
+    "\3\36\34\0\1\235\31\0\1\170\1\0\1\114\1\0"+
+    "\4\121\1\0\3\122\3\0\15\121\3\0\3\121\34\0"+
+    "\1\236\32\0\1\237\2\0\4\231\7\0\15\231\3\0"+
+    "\3\231\35\0\1\240\62\0\1\241\20\0\1\242\77\0"+
+    "\1\243\53\0\1\244\32\0\1\34\1\0\4\171\1\0"+
+    "\3\122\3\0\15\171\3\0\3\171\36\0\1\245\53\0"+
+    "\1\246\33\0\4\247\7\0\15\247\3\0\3\247\36\0"+
+    "\1\250\53\0\1\251\54\0\1\252\61\0\1\253\11\0"+
+    "\1\254\12\0\4\247\7\0\15\247\3\0\3\247\37\0"+
+    "\1\255\53\0\1\256\54\0\1\257\22\0\1\13\62\0"+
+    "\4\260\7\0\15\260\3\0\3\260\40\0\1\261\53\0"+
+    "\1\262\43\0\1\263\26\0\2\260\1\0\2\260\1\0"+
+    "\2\260\2\0\5\260\7\0\15\260\3\0\4\260\27\0"+
+    "\1\264\53\0\1\265\24\0";
 
   private static int [] zzUnpackTrans() {
-    int [] result = new int[7040];
+    int [] result = new int[6908];
     int offset = 0;
     offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
     return result;
@@ -355,8 +351,8 @@ class WikipediaTokenizerImpl {
   private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
 
   private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+
-    "\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+
+    "\12\0\1\11\7\1\1\11\2\1\1\11\5\1\1\11"+
+    "\3\1\1\11\13\1\1\11\5\1\2\11\3\0\1\11"+
     "\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+
     "\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+
     "\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+
@@ -365,7 +361,7 @@ class WikipediaTokenizerImpl {
     "\2\0\3\11";
 
   private static int [] zzUnpackAttribute() {
-    int [] result = new int[184];
+    int [] result = new int[181];
     int offset = 0;
     offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
     return result;
@@ -508,7 +504,6 @@ final void reset() {
 
   /**
    * Creates a new scanner
-   * There is also a java.io.InputStream version of this constructor.
    *
    * @param   in  the java.io.Reader to read input from.
    */
@@ -516,7 +511,6 @@ final void reset() {
     this.zzReader = in;
   }
 
-  
 
   /** 
    * Unpacks the compressed character translation table.

Modified: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex?rev=1548595&r1=1548594&r2=1548595&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex Fri Dec  6 16:51:43 2013
@@ -212,7 +212,7 @@ DOUBLE_EQUALS = "="{2}
   {DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
   {CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;}
 //ignore
-  . | {WHITESPACE} |{INFOBOX}                                               {numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+  [^] |{INFOBOX}                                               {numWikiTokensSeen = 0;  positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
 }
 
 <INTERNAL_LINK_STATE>{
@@ -221,7 +221,7 @@ DOUBLE_EQUALS = "="{2}
   {ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;}
   {DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
   //ignore
-  . | {WHITESPACE}                                               { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+  [^]                                               { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
 }
 
 <EXTERNAL_LINK_STATE>{
@@ -236,7 +236,7 @@ DOUBLE_EQUALS = "="{2}
   {ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;}
   {DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;}
   //ignore
-  . | {WHITESPACE}                                               { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
+  [^]                                               { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;}
 }
 //italics
 <TWO_SINGLE_QUOTES_STATE>{
@@ -249,7 +249,7 @@ DOUBLE_EQUALS = "="{2}
    {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
 
    //ignore
-  . | {WHITESPACE}                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+   [^]                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 }
 //bold
 <THREE_SINGLE_QUOTES_STATE>{
@@ -260,7 +260,7 @@ DOUBLE_EQUALS = "="{2}
    {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
 
    //ignore
-  . | {WHITESPACE}                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+   [^]                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 
 }
 //bold italics
@@ -272,7 +272,7 @@ DOUBLE_EQUALS = "="{2}
    {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;}
 
    //ignore
-  . | {WHITESPACE}                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+   [^]                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 }
 
 <DOUBLE_EQUALS_STATE>{
@@ -280,15 +280,15 @@ DOUBLE_EQUALS = "="{2}
  {ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;}
  {DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
   //ignore
-  . | {WHITESPACE}                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+  [^]                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 }
 
 <DOUBLE_BRACE_STATE>{
   {ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;}
   {DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
   {CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;}
-   //ignore
-  . | {WHITESPACE}                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+  //ignore
+  [^]                                               { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 }
 
 <STRING> {
@@ -305,7 +305,7 @@ DOUBLE_EQUALS = "="{2}
 
   {PIPE} {yybegin(STRING); return currentTokType;/*pipe*/}
 
-  .|{WHITESPACE}                                              { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
+  [^]                                              { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ }
 }
 
 
@@ -327,7 +327,7 @@ DOUBLE_EQUALS = "="{2}
 //end wikipedia
 
 /** Ignore the rest */
-. | {WHITESPACE}|{TAGS}                                                { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
+[^] | {TAGS}                                          { /* Break so we don't hit fall-through warning: */ break;/* ignore */ }
 
 
 //INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2}

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1548595&r1=1548594&r2=1548595&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Fri Dec  6 16:51:43 2013
@@ -202,7 +202,7 @@ public class TestStandardAnalyzer extend
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
+    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
     wordBreakTest.test(a);
   }
   
@@ -230,6 +230,8 @@ public class TestStandardAnalyzer extend
     checkOneTerm(a, "壹゙", "壹゙"); // ideographic
     checkOneTerm(a, "아゙",  "아゙"); // hangul
   }
+  
+  
 
   /** blast some random strings through the analyzer */
   public void testRandomStrings() throws Exception {

Modified: lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java?rev=1548595&r1=1548594&r2=1548595&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java (original)
+++ lucene/dev/trunk/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java Fri Dec  6 16:51:43 2013
@@ -424,7 +424,7 @@ public class TestUAX29URLEmailTokenizer 
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_1_0 wordBreakTest = new WordBreakTestUnicode_6_1_0();
+    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
     wordBreakTest.test(a);
   }