You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2010/10/15 07:41:55 UTC

svn commit: r1022826 [1/3] - in /lucene/dev/trunk/modules/analysis: ./ common/src/java/org/apache/lucene/analysis/standard/ common/src/test/org/apache/lucene/analysis/core/

Author: sarowe
Date: Fri Oct 15 05:41:54 2010
New Revision: 1022826

URL: http://svn.apache.org/viewvc?rev=1022826&view=rev
Log:
LUCENE-2699: Update StandardTokenizer and UAX29Tokenizer to Unicode 6.0.0

Added:
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_6_0_0.java   (with props)
Removed:
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/WordBreakTestUnicode_5_2_0.java
Modified:
    lucene/dev/trunk/modules/analysis/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/generateJavaUnicodeWordBreakTest.pl

Modified: lucene/dev/trunk/modules/analysis/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/CHANGES.txt?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/analysis/CHANGES.txt Fri Oct 15 05:41:54 2010
@@ -15,6 +15,9 @@ API Changes
    RFCs.  ClassicTokenizer/Analyzer retains the old StandardTokenizer/Analyzer
    behavior.  (Steven Rowe, Robert Muir, Uwe Schindler)
 
+ * LUCENE-2699: Update StandardTokenizer and UAX29Tokenizer to Unicode 6.0.0.
+   (Steven Rowe)
+   
  * LUCENE-1370: Added ShingleFilter option to output unigrams if no shingles
    can be generated. (Chris Harris via Steven Rowe)
    

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Fri Oct 15 05:41:54 2010
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Sunday, October 3, 2010 11:34:02 AM UTC
-// generated on Sunday, October 3, 2010 1:07:42 PM UTC
+// file version from Tuesday, October 12, 2010 11:34:09 AM UTC
+// generated on Wednesday, October 13, 2010 4:12:27 AM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt Fri Oct 15 05:41:54 2010
@@ -18,4 +18,4 @@
 
 WARNING: if you change StandardTokenizerImpl*.jflex and need to regenerate
       the tokenizer, only use the trunk version of JFlex 1.5 (with a minimum
-      SVN revision 591) at the moment!
+      SVN revision 597) at the moment!

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java Fri Oct 15 05:41:54 2010
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 10/3/10 9:07 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 10/13/10 12:12 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -87,107 +87,109 @@ public final class StandardTokenizerImpl
     "\37\1\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0"+
     "\1\1\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0"+
     "\1\1\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0"+
-    "\213\1\1\0\7\2\234\1\13\0\46\1\2\0\1\1\7\0\47\1"+
+    "\213\1\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1"+
     "\1\0\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2"+
     "\1\0\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0"+
-    "\2\6\2\0\13\2\6\0\52\1\24\2\1\0\12\3\1\0\1\3"+
-    "\1\6\1\0\2\1\1\2\143\1\1\0\1\1\17\2\2\1\2\2"+
-    "\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1"+
-    "\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1"+
-    "\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1"+
-    "\11\2\1\1\3\2\1\1\5\2\322\0\4\2\66\1\2\0\1\2"+
-    "\1\1\21\2\1\0\1\1\5\2\2\0\12\1\2\2\2\0\12\3"+
-    "\1\0\2\1\6\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2"+
-    "\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0"+
-    "\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0"+
-    "\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0"+
-    "\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0"+
-    "\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2"+
-    "\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1"+
-    "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2"+
-    "\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0"+
-    "\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
-    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0"+
-    "\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2"+
-    "\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0"+
-    "\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0"+
-    "\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0"+
-    "\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0"+
-    "\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0"+
-    "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1"+
-    "\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0"+
-    "\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1"+
-    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0"+
-    "\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1"+
-    "\1\0\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2\1\0"+
-    "\4\2\11\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0\6\1"+
-    "\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1"+
-    "\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2"+
-    "\22\0\2\2\15\0\60\105\1\106\2\105\7\106\5\0\7\105\10\106"+
-    "\1\0\12\3\47\0\2\105\1\0\1\105\2\0\2\105\1\0\1\105"+
-    "\2\0\1\105\6\0\4\105\1\0\7\105\1\0\3\105\1\0\1\105"+
-    "\1\0\1\105\2\0\2\105\1\0\4\105\1\106\2\105\6\106\1\0"+
-    "\2\106\1\105\2\0\5\105\1\0\1\105\1\0\6\106\2\0\12\3"+
-    "\2\0\2\105\42\0\1\1\27\0\2\2\6\0\12\3\13\0\1\2"+
-    "\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0"+
-    "\24\2\1\0\2\2\4\1\4\0\10\2\1\0\44\2\11\0\1\2"+
-    "\71\0\53\105\24\106\1\105\12\3\6\0\6\105\4\106\4\105\3\106"+
-    "\1\105\3\106\2\105\7\106\3\105\4\106\15\105\14\106\1\105\1\106"+
-    "\12\3\4\106\2\105\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1"+
-    "\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1"+
-    "\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1"+
-    "\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1"+
-    "\4\0\1\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1"+
-    "\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1"+
-    "\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0"+
-    "\3\1\1\0\2\2\14\0\64\105\40\106\3\0\1\105\4\0\1\105"+
-    "\1\106\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0"+
-    "\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0"+
-    "\14\2\12\0\12\3\36\105\2\0\5\105\13\0\54\105\4\0\21\106"+
-    "\7\105\2\106\6\0\13\3\3\0\2\105\40\0\27\1\5\2\4\0"+
-    "\65\105\12\106\1\0\35\106\2\0\1\2\12\3\6\0\12\3\6\0"+
-    "\16\105\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0\11\2"+
-    "\14\0\3\2\36\1\12\2\3\0\2\1\12\3\106\0\44\1\24\2"+
-    "\10\0\12\3\3\0\3\1\12\3\44\1\122\0\3\2\1\0\25\2"+
-    "\4\1\1\2\4\1\1\2\15\0\300\1\47\2\26\0\3\2\u0116\1"+
-    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
-    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
-    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\2\10\0\2\7"+
-    "\12\0\1\7\2\0\1\5\2\0\5\2\20\0\2\10\3\0\1\6"+
-    "\17\0\1\10\13\0\5\2\5\0\6\2\1\0\1\1\15\0\1\1"+
-    "\20\0\5\1\73\0\41\2\21\0\1\1\4\0\1\1\2\0\12\1"+
-    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
-    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
-    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
-    "\6\0\4\1\3\2\16\0\46\1\12\0\66\1\11\0\1\1\20\0"+
-    "\27\1\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\57\0"+
-    "\1\1\120\0\32\107\1\0\131\107\14\0\326\107\57\0\1\1\1\0"+
-    "\1\107\31\0\11\107\6\2\1\0\5\4\2\0\3\107\1\1\1\1"+
-    "\4\0\126\110\2\0\2\2\2\4\3\110\133\4\1\0\4\4\5\0"+
-    "\51\1\3\0\136\1\21\0\30\1\70\0\20\4\320\0\57\4\1\0"+
-    "\130\4\250\0\u19b6\107\112\0\u51cc\107\64\0\u048d\1\103\0\56\1\2\0"+
-    "\u010d\1\3\0\20\1\12\3\2\1\24\0\40\1\2\0\15\1\4\2"+
-    "\11\0\2\2\1\0\31\1\10\0\120\1\2\2\45\0\11\1\2\0"+
-    "\147\1\2\0\2\1\156\0\7\1\1\2\3\1\1\2\4\1\1\2"+
-    "\27\1\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3"+
-    "\6\0\22\2\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0"+
-    "\27\1\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1"+
-    "\12\3\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0"+
-    "\12\3\6\0\33\105\1\106\4\0\60\105\1\106\1\105\3\106\2\105"+
-    "\2\106\5\105\2\106\1\105\1\106\1\105\30\0\5\105\340\0\43\1"+
-    "\10\2\1\0\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0"+
-    "\61\1\u2104\0\u012e\107\2\0\76\107\2\0\152\107\46\0\7\1\14\0"+
-    "\5\1\5\0\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0"+
-    "\1\1\1\0\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0"+
-    "\100\1\2\0\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5"+
-    "\1\6\13\0\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7"+
-    "\1\0\1\6\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0"+
-    "\1\7\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0"+
-    "\32\1\4\0\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0"+
-    "\6\1\2\0\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
+    "\2\6\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6"+
+    "\1\0\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1"+
+    "\2\2\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2"+
+    "\1\1\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3"+
+    "\41\1\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2"+
+    "\1\1\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0"+
+    "\4\2\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0"+
+    "\12\3\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0"+
+    "\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0"+
+    "\1\2\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2"+
+    "\4\0\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2"+
+    "\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1"+
+    "\1\0\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2"+
+    "\2\0\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3"+
+    "\2\2\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0"+
+    "\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1"+
+    "\10\2\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2"+
+    "\2\0\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1"+
+    "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2"+
+    "\2\0\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1"+
+    "\2\2\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1"+
+    "\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1"+
+    "\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2"+
+    "\1\0\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2"+
+    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
+    "\3\0\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0"+
+    "\2\1\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1"+
+    "\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2"+
+    "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1"+
+    "\1\0\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0"+
+    "\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2"+
+    "\1\0\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3"+
+    "\12\0\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1"+
+    "\1\0\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2"+
+    "\1\0\10\2\22\0\2\2\15\0\60\105\1\106\2\105\7\106\5\0"+
+    "\7\105\10\106\1\0\12\3\47\0\2\105\1\0\1\105\2\0\2\105"+
+    "\1\0\1\105\2\0\1\105\6\0\4\105\1\0\7\105\1\0\3\105"+
+    "\1\0\1\105\1\0\1\105\2\0\2\105\1\0\4\105\1\106\2\105"+
+    "\6\106\1\0\2\106\1\105\2\0\5\105\1\0\1\105\1\0\6\106"+
+    "\2\0\12\3\2\0\2\105\42\0\1\1\27\0\2\2\6\0\12\3"+
+    "\13\0\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0"+
+    "\44\1\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0"+
+    "\1\2\71\0\53\105\24\106\1\105\12\3\6\0\6\105\4\106\4\105"+
+    "\3\106\1\105\3\106\2\105\7\106\3\105\4\106\15\105\14\106\1\105"+
+    "\1\106\12\3\4\106\2\105\46\1\12\0\53\1\1\0\1\1\3\0"+
+    "\u0149\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
+    "\51\1\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0"+
+    "\1\1\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0"+
+    "\103\1\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0"+
+    "\21\1\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0"+
+    "\4\1\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1"+
+    "\1\0\3\1\1\0\2\2\14\0\64\105\40\106\3\0\1\105\4\0"+
+    "\1\105\1\106\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1"+
+    "\10\0\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2"+
+    "\4\0\14\2\12\0\12\3\36\105\2\0\5\105\13\0\54\105\4\0"+
+    "\21\106\7\105\2\106\6\0\12\3\1\105\3\0\2\105\40\0\27\1"+
+    "\5\2\4\0\65\105\12\106\1\0\35\106\2\0\1\2\12\3\6\0"+
+    "\12\3\6\0\16\105\122\0\5\2\57\1\21\2\7\1\4\0\12\3"+
+    "\21\0\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0"+
+    "\46\1\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3"+
+    "\44\1\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0"+
+    "\300\1\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0"+
+    "\6\1\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0"+
+    "\37\1\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0"+
+    "\7\1\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0"+
+    "\7\1\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0"+
+    "\5\2\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0"+
+    "\6\2\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0"+
+    "\1\1\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0"+
+    "\1\1\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0"+
+    "\4\1\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0"+
+    "\57\1\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1"+
+    "\12\0\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0"+
+    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
+    "\7\1\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\107\1\0"+
+    "\131\107\14\0\326\107\57\0\1\1\1\0\1\107\31\0\11\107\6\2"+
+    "\1\0\5\4\2\0\3\107\1\1\1\1\4\0\126\110\2\0\2\2"+
+    "\2\4\3\110\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0"+
+    "\33\1\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\107\112\0"+
+    "\u51cc\107\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3"+
+    "\2\1\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1"+
+    "\2\2\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0"+
+    "\12\1\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2"+
+    "\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2"+
+    "\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2"+
+    "\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0"+
+    "\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0"+
+    "\33\105\1\106\4\0\60\105\1\106\1\105\3\106\2\105\2\106\5\105"+
+    "\2\106\1\105\1\106\1\105\30\0\5\105\41\0\6\1\2\0\6\1"+
+    "\2\0\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0"+
+    "\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0"+
+    "\u012e\107\2\0\76\107\2\0\152\107\46\0\7\1\14\0\5\1\5\0"+
+    "\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0"+
+    "\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0"+
+    "\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0"+
+    "\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6"+
+    "\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0"+
+    "\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0"+
+    "\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0"+
+    "\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
 
   /** 
    * Translates characters to character classes
@@ -2440,7 +2442,7 @@ public final class StandardTokenizerImpl
     char [] map = new char[0x10000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2300) {
+    while (i < 2336) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -2713,36 +2715,36 @@ public final class StandardTokenizerImpl
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 2: 
-          { return WORD_TYPE;
+        case 1: 
+          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
           }
         case 9: break;
-        case 4: 
-          { return SOUTH_EAST_ASIAN_TYPE;
+        case 6: 
+          { return HIRAGANA_TYPE;
           }
         case 10: break;
-        case 8: 
-          { return URL_TYPE;
+        case 2: 
+          { return WORD_TYPE;
           }
         case 11: break;
-        case 7: 
-          { return EMAIL_TYPE;
+        case 8: 
+          { return URL_TYPE;
           }
         case 12: break;
         case 5: 
           { return IDEOGRAPHIC_TYPE;
           }
         case 13: break;
-        case 1: 
-          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+        case 7: 
+          { return EMAIL_TYPE;
           }
         case 14: break;
         case 3: 
           { return NUMERIC_TYPE;
           }
         case 15: break;
-        case 6: 
-          { return HIRAGANA_TYPE;
+        case 4: 
+          { return SOUTH_EAST_ASIAN_TYPE;
           }
         case 16: break;
         default: 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex Fri Oct 15 05:41:54 2010
@@ -47,7 +47,7 @@ import org.apache.lucene.analysis.tokena
  */
 %%
 
-%unicode 5.2
+%unicode 6.0
 %integer
 %final
 %public
@@ -234,7 +234,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNam
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.java?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.java Fri Oct 15 05:41:54 2010
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 10/3/10 9:07 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 10/13/10 12:12 AM */
 
 package org.apache.lucene.analysis.standard;
 
@@ -85,107 +85,109 @@ public final class UAX29Tokenizer extend
     "\1\0\u01ca\1\4\0\14\1\16\0\5\1\7\0\1\1\1\0\1\1"+
     "\21\0\160\2\5\1\1\0\2\1\2\0\4\1\1\6\7\0\1\1"+
     "\1\5\3\1\1\0\1\1\1\0\24\1\1\0\123\1\1\0\213\1"+
-    "\1\0\7\2\234\1\13\0\46\1\2\0\1\1\7\0\47\1\1\0"+
+    "\1\0\7\2\236\1\11\0\46\1\2\0\1\1\7\0\47\1\1\0"+
     "\1\6\7\0\55\2\1\0\1\2\1\0\2\2\1\0\2\2\1\0"+
     "\1\2\10\0\33\1\5\0\4\1\1\5\13\0\4\2\10\0\2\6"+
-    "\2\0\13\2\6\0\52\1\24\2\1\0\12\3\1\0\1\3\1\6"+
-    "\1\0\2\1\1\2\143\1\1\0\1\1\17\2\2\1\2\2\1\0"+
-    "\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1\1\2"+
-    "\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1\11\2"+
-    "\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1\11\2"+
-    "\1\1\3\2\1\1\5\2\322\0\4\2\66\1\2\0\1\2\1\1"+
-    "\21\2\1\0\1\1\5\2\2\0\12\1\2\2\2\0\12\3\1\0"+
-    "\2\1\6\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1\2\0"+
-    "\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2\1\1"+
-    "\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0\2\1"+
-    "\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0\6\1"+
-    "\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0\2\1"+
-    "\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0\3\2"+
-    "\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2\3\1"+
-    "\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1\1\0"+
-    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2\1\0"+
-    "\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0\12\3"+
-    "\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0\7\1"+
-    "\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0\2\2"+
-    "\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2\2\0"+
-    "\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0\3\1"+
-    "\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0\2\1"+
-    "\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0\4\2"+
-    "\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0\10\1"+
-    "\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0\1\1"+
-    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1\6\0"+
-    "\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1"+
-    "\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1\7\2"+
-    "\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0\2\1"+
-    "\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0\3\1\1\0"+
-    "\27\1\1\0\20\1\3\0\1\1\7\2\1\0\3\2\1\0\4\2"+
-    "\11\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0\6\1\2\0"+
-    "\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0\1\1\2\0"+
-    "\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0\10\2\22\0"+
-    "\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11\10\12\1\0"+
-    "\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0\1\11\2\0"+
-    "\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0\1\11\1\0"+
-    "\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12\1\0\2\12"+
-    "\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0\12\3\2\0"+
-    "\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0\1\2\1\0"+
-    "\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1\4\0\24\2"+
-    "\1\0\2\2\4\1\4\0\10\2\1\0\44\2\11\0\1\2\71\0"+
-    "\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12\1\11"+
-    "\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12\12\3"+
-    "\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1\1\0"+
-    "\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0"+
-    "\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0"+
-    "\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1\4\0"+
-    "\1\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0"+
-    "\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\2"+
-    "\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0\3\1"+
-    "\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11\1\12"+
-    "\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0\51\1"+
-    "\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0\14\2"+
-    "\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12\7\11"+
-    "\2\12\6\0\13\3\3\0\2\11\40\0\27\1\5\2\4\0\65\11"+
-    "\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3\6\0\16\11"+
-    "\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0\11\2\14\0"+
-    "\3\2\36\1\12\2\3\0\2\1\12\3\106\0\44\1\24\2\10\0"+
-    "\12\3\3\0\3\1\12\3\44\1\122\0\3\2\1\0\25\2\4\1"+
-    "\1\2\4\1\1\2\15\0\300\1\47\2\26\0\3\2\u0116\1\2\0"+
-    "\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1\1\0"+
-    "\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1\1\0"+
-    "\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1\4\0"+
-    "\15\1\5\0\3\1\1\0\7\1\17\0\4\2\10\0\2\7\12\0"+
-    "\1\7\2\0\1\5\2\0\5\2\20\0\2\10\3\0\1\6\17\0"+
-    "\1\10\13\0\5\2\5\0\6\2\1\0\1\1\15\0\1\1\20\0"+
-    "\5\1\73\0\41\2\21\0\1\1\4\0\1\1\2\0\12\1\1\0"+
-    "\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1\1\0"+
-    "\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1\21\0"+
-    "\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1\6\0"+
-    "\4\1\3\2\16\0\46\1\12\0\66\1\11\0\1\1\20\0\27\1"+
-    "\11\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
-    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\40\2\57\0\1\1"+
-    "\120\0\32\13\1\0\131\13\14\0\326\13\57\0\1\1\1\0\1\13"+
-    "\31\0\11\13\6\2\1\0\5\4\2\0\3\13\1\1\1\1\4\0"+
-    "\126\14\2\0\2\2\2\4\3\14\133\4\1\0\4\4\5\0\51\1"+
-    "\3\0\136\1\21\0\30\1\70\0\20\4\320\0\57\4\1\0\130\4"+
-    "\250\0\u19b6\13\112\0\u51cc\13\64\0\u048d\1\103\0\56\1\2\0\u010d\1"+
-    "\3\0\20\1\12\3\2\1\24\0\40\1\2\0\15\1\4\2\11\0"+
-    "\2\2\1\0\31\1\10\0\120\1\2\2\45\0\11\1\2\0\147\1"+
-    "\2\0\2\1\156\0\7\1\1\2\3\1\1\2\4\1\1\2\27\1"+
-    "\5\2\30\0\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0"+
-    "\22\2\6\1\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1"+
-    "\15\2\14\0\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3"+
-    "\46\0\51\1\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3"+
-    "\6\0\33\11\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12"+
-    "\5\11\2\12\1\11\1\12\1\11\30\0\5\11\340\0\43\1\10\2"+
-    "\1\0\2\2\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1"+
-    "\u2104\0\u012e\13\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1"+
-    "\5\0\1\1\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1"+
-    "\1\0\2\1\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1"+
-    "\2\0\66\1\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6"+
-    "\13\0\7\2\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0"+
-    "\1\6\1\5\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7"+
-    "\4\0\1\6\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1"+
-    "\4\0\1\10\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1"+
-    "\2\0\6\1\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
+    "\2\0\13\2\5\0\53\1\25\2\12\3\1\0\1\3\1\6\1\0"+
+    "\2\1\1\2\143\1\1\0\1\1\10\2\1\0\6\2\2\1\2\2"+
+    "\1\0\4\2\2\1\12\3\3\1\2\0\1\1\17\0\1\2\1\1"+
+    "\1\2\36\1\33\2\2\0\131\1\13\2\1\1\16\0\12\3\41\1"+
+    "\11\2\2\1\2\0\1\6\1\0\1\1\5\0\26\1\4\2\1\1"+
+    "\11\2\1\1\3\2\1\1\5\2\22\0\31\1\3\2\244\0\4\2"+
+    "\66\1\3\2\1\1\22\2\1\1\7\2\12\1\2\2\2\0\12\3"+
+    "\1\0\7\1\1\0\7\1\1\0\3\2\1\0\10\1\2\0\2\1"+
+    "\2\0\26\1\1\0\7\1\1\0\1\1\3\0\4\1\2\0\1\2"+
+    "\1\1\7\2\2\0\2\2\2\0\3\2\1\1\10\0\1\2\4\0"+
+    "\2\1\1\0\3\1\2\2\2\0\12\3\2\1\17\0\3\2\1\0"+
+    "\6\1\4\0\2\1\2\0\26\1\1\0\7\1\1\0\2\1\1\0"+
+    "\2\1\1\0\2\1\2\0\1\2\1\0\5\2\4\0\2\2\2\0"+
+    "\3\2\3\0\1\2\7\0\4\1\1\0\1\1\7\0\12\3\2\2"+
+    "\3\1\1\2\13\0\3\2\1\0\11\1\1\0\3\1\1\0\26\1"+
+    "\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\10\2"+
+    "\1\0\3\2\1\0\3\2\2\0\1\1\17\0\2\1\2\2\2\0"+
+    "\12\3\21\0\3\2\1\0\10\1\2\0\2\1\2\0\26\1\1\0"+
+    "\7\1\1\0\2\1\1\0\5\1\2\0\1\2\1\1\7\2\2\0"+
+    "\2\2\2\0\3\2\10\0\2\2\4\0\2\1\1\0\3\1\2\2"+
+    "\2\0\12\3\1\0\1\1\20\0\1\2\1\1\1\0\6\1\3\0"+
+    "\3\1\1\0\4\1\3\0\2\1\1\0\1\1\1\0\2\1\3\0"+
+    "\2\1\3\0\3\1\3\0\14\1\4\0\5\2\3\0\3\2\1\0"+
+    "\4\2\2\0\1\1\6\0\1\2\16\0\12\3\21\0\3\2\1\0"+
+    "\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1\3\0"+
+    "\1\1\7\2\1\0\3\2\1\0\4\2\7\0\2\2\1\0\2\1"+
+    "\6\0\2\1\2\2\2\0\12\3\22\0\2\2\1\0\10\1\1\0"+
+    "\3\1\1\0\27\1\1\0\12\1\1\0\5\1\2\0\1\2\1\1"+
+    "\7\2\1\0\3\2\1\0\4\2\7\0\2\2\7\0\1\1\1\0"+
+    "\2\1\2\2\2\0\12\3\1\0\2\1\17\0\2\2\1\0\10\1"+
+    "\1\0\3\1\1\0\51\1\2\0\1\1\7\2\1\0\3\2\1\0"+
+    "\4\2\1\1\10\0\1\2\10\0\2\1\2\2\2\0\12\3\12\0"+
+    "\6\1\2\0\2\2\1\0\22\1\3\0\30\1\1\0\11\1\1\0"+
+    "\1\1\2\0\7\1\3\0\1\2\4\0\6\2\1\0\1\2\1\0"+
+    "\10\2\22\0\2\2\15\0\60\11\1\12\2\11\7\12\5\0\7\11"+
+    "\10\12\1\0\12\3\47\0\2\11\1\0\1\11\2\0\2\11\1\0"+
+    "\1\11\2\0\1\11\6\0\4\11\1\0\7\11\1\0\3\11\1\0"+
+    "\1\11\1\0\1\11\2\0\2\11\1\0\4\11\1\12\2\11\6\12"+
+    "\1\0\2\12\1\11\2\0\5\11\1\0\1\11\1\0\6\12\2\0"+
+    "\12\3\2\0\2\11\42\0\1\1\27\0\2\2\6\0\12\3\13\0"+
+    "\1\2\1\0\1\2\1\0\1\2\4\0\2\2\10\1\1\0\44\1"+
+    "\4\0\24\2\1\0\2\2\5\1\13\2\1\0\44\2\11\0\1\2"+
+    "\71\0\53\11\24\12\1\11\12\3\6\0\6\11\4\12\4\11\3\12"+
+    "\1\11\3\12\2\11\7\12\3\11\4\12\15\11\14\12\1\11\1\12"+
+    "\12\3\4\12\2\11\46\1\12\0\53\1\1\0\1\1\3\0\u0149\1"+
+    "\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0\51\1"+
+    "\1\0\4\1\2\0\41\1\1\0\4\1\2\0\7\1\1\0\1\1"+
+    "\1\0\4\1\2\0\17\1\1\0\71\1\1\0\4\1\2\0\103\1"+
+    "\2\0\3\2\40\0\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1"+
+    "\1\0\32\1\5\0\113\1\3\0\3\1\17\0\15\1\1\0\4\1"+
+    "\3\2\13\0\22\1\3\2\13\0\22\1\2\2\14\0\15\1\1\0"+
+    "\3\1\1\0\2\2\14\0\64\11\40\12\3\0\1\11\4\0\1\11"+
+    "\1\12\2\0\12\3\41\0\3\2\2\0\12\3\6\0\130\1\10\0"+
+    "\51\1\1\2\1\1\5\0\106\1\12\0\35\1\3\0\14\2\4\0"+
+    "\14\2\12\0\12\3\36\11\2\0\5\11\13\0\54\11\4\0\21\12"+
+    "\7\11\2\12\6\0\12\3\1\11\3\0\2\11\40\0\27\1\5\2"+
+    "\4\0\65\11\12\12\1\0\35\12\2\0\1\2\12\3\6\0\12\3"+
+    "\6\0\16\11\122\0\5\2\57\1\21\2\7\1\4\0\12\3\21\0"+
+    "\11\2\14\0\3\2\36\1\12\2\3\0\2\1\12\3\6\0\46\1"+
+    "\16\2\14\0\44\1\24\2\10\0\12\3\3\0\3\1\12\3\44\1"+
+    "\122\0\3\2\1\0\25\2\4\1\1\2\4\1\1\2\15\0\300\1"+
+    "\47\2\25\0\4\2\u0116\1\2\0\6\1\2\0\46\1\2\0\6\1"+
+    "\2\0\10\1\1\0\1\1\1\0\1\1\1\0\1\1\1\0\37\1"+
+    "\2\0\65\1\1\0\7\1\1\0\1\1\3\0\3\1\1\0\7\1"+
+    "\3\0\4\1\2\0\6\1\4\0\15\1\5\0\3\1\1\0\7\1"+
+    "\17\0\4\2\10\0\2\7\12\0\1\7\2\0\1\5\2\0\5\2"+
+    "\20\0\2\10\3\0\1\6\17\0\1\10\13\0\5\2\5\0\6\2"+
+    "\1\0\1\1\15\0\1\1\20\0\15\1\63\0\41\2\21\0\1\1"+
+    "\4\0\1\1\2\0\12\1\1\0\1\1\3\0\5\1\6\0\1\1"+
+    "\1\0\1\1\1\0\1\1\1\0\4\1\1\0\13\1\2\0\4\1"+
+    "\5\0\5\1\4\0\1\1\21\0\51\1\u032d\0\64\1\u0716\0\57\1"+
+    "\1\0\57\1\1\0\205\1\6\0\4\1\3\2\16\0\46\1\12\0"+
+    "\66\1\11\0\1\1\17\0\1\2\27\1\11\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1"+
+    "\1\0\7\1\1\0\40\2\57\0\1\1\120\0\32\13\1\0\131\13"+
+    "\14\0\326\13\57\0\1\1\1\0\1\13\31\0\11\13\6\2\1\0"+
+    "\5\4\2\0\3\13\1\1\1\1\4\0\126\14\2\0\2\2\2\4"+
+    "\3\14\133\4\1\0\4\4\5\0\51\1\3\0\136\1\21\0\33\1"+
+    "\65\0\20\4\320\0\57\4\1\0\130\4\250\0\u19b6\13\112\0\u51cc\13"+
+    "\64\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\3\2\1"+
+    "\24\0\57\1\4\2\11\0\2\2\1\0\31\1\10\0\120\1\2\2"+
+    "\45\0\11\1\2\0\147\1\2\0\4\1\1\0\2\1\16\0\12\1"+
+    "\120\0\10\1\1\2\3\1\1\2\4\1\1\2\27\1\5\2\30\0"+
+    "\64\1\14\0\2\2\62\1\21\2\13\0\12\3\6\0\22\2\6\1"+
+    "\3\0\1\1\4\0\12\3\34\1\10\2\2\0\27\1\15\2\14\0"+
+    "\35\1\3\0\4\2\57\1\16\2\16\0\1\1\12\3\46\0\51\1"+
+    "\16\2\11\0\3\1\1\2\10\1\2\2\2\0\12\3\6\0\33\11"+
+    "\1\12\4\0\60\11\1\12\1\11\3\12\2\11\2\12\5\11\2\12"+
+    "\1\11\1\12\1\11\30\0\5\11\41\0\6\1\2\0\6\1\2\0"+
+    "\6\1\11\0\7\1\1\0\7\1\221\0\43\1\10\2\1\0\2\2"+
+    "\2\0\12\3\6\0\u2ba4\1\14\0\27\1\4\0\61\1\u2104\0\u012e\13"+
+    "\2\0\76\13\2\0\152\13\46\0\7\1\14\0\5\1\5\0\1\1"+
+    "\1\2\12\1\1\0\15\1\1\0\5\1\1\0\1\1\1\0\2\1"+
+    "\1\0\2\1\1\0\154\1\41\0\u016b\1\22\0\100\1\2\0\66\1"+
+    "\50\0\14\1\4\0\20\2\1\6\2\0\1\5\1\6\13\0\7\2"+
+    "\14\0\2\10\30\0\3\10\1\6\1\0\1\7\1\0\1\6\1\5"+
+    "\32\0\5\1\1\0\207\1\2\0\1\2\7\0\1\7\4\0\1\6"+
+    "\1\0\1\7\1\0\12\3\1\5\1\6\5\0\32\1\4\0\1\10"+
+    "\1\0\32\1\13\0\70\4\2\2\37\1\3\0\6\1\2\0\6\1"+
+    "\2\0\6\1\2\0\3\1\34\0\3\2\4\0";
 
   /** 
    * Translates characters to character classes
@@ -530,7 +532,7 @@ public final class UAX29Tokenizer extend
     char [] map = new char[0x10000];
     int i = 0;  /* index in packed string  */
     int j = 0;  /* index in unpacked array */
-    while (i < 2138) {
+    while (i < 2174) {
       int  count = packed.charAt(i++);
       char value = packed.charAt(i++);
       do map[j++] = value; while (--count > 0);
@@ -803,28 +805,28 @@ public final class UAX29Tokenizer extend
       zzMarkedPos = zzMarkedPosL;
 
       switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 2: 
-          { if (populateAttributes(WORD_TYPE)) return true;
+        case 5: 
+          { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true;
           }
         case 7: break;
-        case 6: 
-          { if (populateAttributes(HIRAGANA_TYPE)) return true;
+        case 1: 
+          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
           }
         case 8: break;
-        case 5: 
-          { if (populateAttributes(IDEOGRAPHIC_TYPE)) return true;
+        case 3: 
+          { if (populateAttributes(NUMERIC_TYPE)) return true;
           }
         case 9: break;
-        case 4: 
-          { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true;
+        case 6: 
+          { if (populateAttributes(HIRAGANA_TYPE)) return true;
           }
         case 10: break;
-        case 3: 
-          { if (populateAttributes(NUMERIC_TYPE)) return true;
+        case 4: 
+          { if (populateAttributes(SOUTH_EAST_ASIAN_TYPE)) return true;
           }
         case 11: break;
-        case 1: 
-          { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+        case 2: 
+          { if (populateAttributes(WORD_TYPE)) return true;
           }
         case 12: break;
         default: 

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29Tokenizer.jflex Fri Oct 15 05:41:54 2010
@@ -53,7 +53,7 @@ import org.apache.lucene.util.AttributeS
  */
 %%
 
-%unicode 5.2
+%unicode 6.0
 %final
 %public
 %apiprivate
@@ -247,7 +247,7 @@ ExtendNumLetEx = \p{WB:ExtendNumLet}    
 //    annex.  That means that satisfactory treatment of languages like Chinese
 //    or Thai requires special handling.
 // 
-// In Unicode 5.2, only one character has the \p{Line_Break = Contingent_Break}
+// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
 // property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
 //
 // In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestStandardAnalyzer.java Fri Oct 15 05:41:54 2010
@@ -394,7 +394,7 @@ public class TestStandardAnalyzer extend
   }
 
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
+    WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
     wordBreakTest.test(a);
   }
 }

Modified: lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java?rev=1022826&r1=1022825&r2=1022826&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29Tokenizer.java Fri Oct 15 05:41:54 2010
@@ -198,7 +198,7 @@ public class TestUAX29Tokenizer extends 
   }
   
   public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_5_2_0 wordBreakTest = new WordBreakTestUnicode_5_2_0();
+    WordBreakTestUnicode_6_0_0 wordBreakTest = new WordBreakTestUnicode_6_0_0();
     wordBreakTest.test(a);
   }
 }