You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/08 13:58:00 UTC

svn commit: r1154936 [1/6] - in /lucene/dev/trunk: lucene/ modules/analysis/common/ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ modules/analysis/commo...

Author: rmuir
Date: Mon Aug  8 11:57:59 2011
New Revision: 1154936

URL: http://svn.apache.org/viewvc?rev=1154936&view=rev
Log:
LUCENE-3361: port url+email tokenizer to standardtokenizerinterface, fix combining marks bug

Added:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
      - copied, changed from r1154038, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java   (with props)
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex
Removed:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/build.xml
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/analysis/TestUAX29URLEmailTokenizerFactory.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1154936&r1=1154935&r2=1154936&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Aug  8 11:57:59 2011
@@ -542,10 +542,11 @@ Bug fixes
 * LUCENE-3340: Fixed case where IndexWriter was not flushing at
   exactly maxBufferedDeleteTerms (Mike McCandless)
 
-* LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached
-  to Han or Hiragana characters, this is fixed if you supply Version >= 3.4
-  If you supply a previous lucene version, you get the old buggy behavior
-  for backwards compatibility.  (Trejkaz, Robert Muir)
+* LUCENE-3358, LUCENE-3361: StandardTokenizer and UAX29URLEmailTokenizer 
+  wrongly discarded combining marks attached to Han or Hiragana characters, 
+  this is fixed if you supply Version >= 3.4 If you supply a previous 
+  lucene version, you get the old buggy behavior for backwards compatibility.  
+  (Trejkaz, Robert Muir)
 
 New Features
 

Modified: lucene/dev/trunk/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/build.xml?rev=1154936&r1=1154935&r2=1154936&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/build.xml (original)
+++ lucene/dev/trunk/modules/analysis/common/build.xml Mon Aug  8 11:57:59 2011
@@ -56,7 +56,7 @@
            nobak="on"/>
   </target>
 
-  <target name="jflex-StandardAnalyzer" depends="init,jflex-check,gen-tlds" if="jflex.present">
+  <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
     <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
     </taskdef>
@@ -76,9 +76,12 @@
     <taskdef classname="jflex.anttask.JFlexTask" name="jflex">
 			<classpath refid="jflex.classpath"/>
     </taskdef>
-    <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex"
+    <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
            outdir="src/java/org/apache/lucene/analysis/standard"
            nobak="on" />
+    <jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard/std31"
+           nobak="on" />
   </target>
   
   <target name="clean-jflex">
@@ -86,7 +89,7 @@
       <fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
         <containsregexp expression="generated.*by.*JFlex"/>
       </fileset>
-      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
+      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
         <containsregexp expression="generated.*by.*JFlex"/>
       </fileset>
     </delete>

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1154936&r1=1154935&r2=1154936&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Mon Aug  8 11:57:59 2011
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
-// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
+// file version from Thursday, August 4, 2011 11:34:20 AM UTC
+// generated on Thursday, August 4, 2011 11:46:19 PM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -288,6 +288,7 @@ ASCIITLD = "." (
 	| [xX][nN]--3[eE]0[bB]707[eE]
 	| [xX][nN]--45[bB][rR][jJ]9[cC]
 	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
+	| [xX][nN]--90[aA]3[aA][cC]
 	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
 	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
 	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
@@ -305,9 +306,11 @@ ASCIITLD = "." (
 	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
 	| [xX][nN]--[kK][pP][rR][wW]13[dD]
 	| [xX][nN]--[kK][pP][rR][yY]57[dD]
+	| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
 	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
 	| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
 	| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
+	| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
 	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
 	| [xX][nN]--[oO]3[cC][wW]4[hH]
 	| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
@@ -321,6 +324,7 @@ ASCIITLD = "." (
 	| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
 	| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
 	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
+	| [xX][xX][xX]
 	| [yY][eE]
 	| [yY][tT]
 	| [zZ][aA]