You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2012/03/19 04:13:53 UTC

svn commit: r1302265 [1/5] - in /lucene/dev/trunk: lucene/ modules/analysis/common/ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/ modules/analysis/commo...

Author: sarowe
Date: Mon Mar 19 03:13:52 2012
New Revision: 1302265

URL: http://svn.apache.org/viewvc?rev=1302265&view=rev
Log:
LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto: scheme is prepended.

Added:
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/ASCIITLD.jflex-macro
      - copied unchanged from r1302114, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/SUPPLEMENTARY.jflex-macro
      - copied unchanged from r1302114, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
      - copied, changed from r1302114, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/modules/analysis/common/build.xml
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
    lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
    lucene/dev/trunk/modules/analysis/common/src/test/org/apache/lucene/analysis/core/TestUAX29URLEmailTokenizer.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1302265&r1=1302264&r2=1302265&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Mon Mar 19 03:13:52 2012
@@ -972,6 +972,9 @@ Bug fixes
 * LUCENE-3876: Fix bug where positions for a document exceeding
   Integer.MAX_VALUE/2 would produce a corrupt index.  
   (Simon Willnauer, Mike Mccandless, Robert Muir)
+
+* LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
+  scheme is prepended. (Kai Gülzau, Steve Rowe)
     
 Optimizations
 

Modified: lucene/dev/trunk/modules/analysis/common/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/build.xml?rev=1302265&r1=1302264&r2=1302265&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/build.xml (original)
+++ lucene/dev/trunk/modules/analysis/common/build.xml Mon Mar 19 03:13:52 2012
@@ -98,6 +98,9 @@
     <jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
            outdir="src/java/org/apache/lucene/analysis/standard/std31"
            nobak="on" />
+    <jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard/std34"
+           nobak="on" />
   </target>
   
   <target name="clean-jflex">

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1302265&r1=1302264&r2=1302265&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Mon Mar 19 03:13:52 2012
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Thursday, August 4, 2011 11:34:20 AM UTC
-// generated on Thursday, August 4, 2011 11:46:19 PM UTC
+// file version from Sunday, March 18, 2012 4:34:02 AM UTC
+// generated on Sunday, March 18, 2012 4:02:55 PM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -79,6 +79,7 @@ ASCIITLD = "." (
 	| [cC][rR]
 	| [cC][uU]
 	| [cC][vV]
+	| [cC][wW]
 	| [cC][xX]
 	| [cC][yY]
 	| [cC][zZ]
@@ -247,6 +248,7 @@ ASCIITLD = "." (
 	| [sS][tT]
 	| [sS][uU]
 	| [sS][vV]
+	| [sS][xX]
 	| [sS][yY]
 	| [sS][zZ]
 	| [tT][cC]
@@ -288,6 +290,7 @@ ASCIITLD = "." (
 	| [xX][nN]--3[eE]0[bB]707[eE]
 	| [xX][nN]--45[bB][rR][jJ]9[cC]
 	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
+	| [xX][nN]--80[aA][oO]21[aA]
 	| [xX][nN]--90[aA]3[aA][cC]
 	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
 	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]

Modified: lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1302265&r1=1302264&r2=1302265&view=diff
==============================================================================
--- lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Mon Mar 19 03:13:52 2012
@@ -23,8 +23,8 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
 import org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
+import org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -128,8 +128,10 @@ public final class UAX29URLEmailTokenize
   }
 
   private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
-    if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
       return new UAX29URLEmailTokenizerImpl(input);
+    } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+      return new UAX29URLEmailTokenizerImpl34(input);
     } else {
       return new UAX29URLEmailTokenizerImpl31(input);
     }