You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2012/03/19 05:14:32 UTC

svn commit: r1302269 [1/5] - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/core/ lucene/core/src/java/org/apache/lucene/analysis/standard/ lucene/core/src/java/org/apache/lucene/analysis/standard/std34/ lucene/core/src/test/org/apache/lucene/ana...

Author: sarowe
Date: Mon Mar 19 04:14:31 2012
New Revision: 1302269

URL: http://svn.apache.org/viewvc?rev=1302269&view=rev
Log:
LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto: scheme is prepended.

Added:
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/std34/
      - copied from r1302265, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/std34/ASCIITLD.jflex-macro
      - copied unchanged from r1302265, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/ASCIITLD.jflex-macro
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/std34/SUPPLEMENTARY.jflex-macro
      - copied unchanged from r1302265, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/SUPPLEMENTARY.jflex-macro
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.java
      - copied, changed from r1302265, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
      - copied unchanged from r1302265, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/core/build.xml
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
    lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
    lucene/dev/branches/branch_3x/lucene/core/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1302269&r1=1302268&r2=1302269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon Mar 19 04:14:31 2012
@@ -271,6 +271,9 @@ Bug fixes
 * LUCENE-3876: Fix bug where positions for a document exceeding
   Integer.MAX_VALUE/2 would produce a corrupt index.  
   (Simon Willnauer, Mike Mccandless, Robert Muir)
+
+* LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
+  scheme is prepended. (Kai Gülzau, Steve Rowe)
     
 Optimizations
 

Modified: lucene/dev/branches/branch_3x/lucene/core/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/build.xml?rev=1302269&r1=1302268&r2=1302269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/core/build.xml Mon Mar 19 04:14:31 2012
@@ -92,6 +92,9 @@
     <jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
            outdir="src/java/org/apache/lucene/analysis/standard/std31"
            nobak="on" />
+    <jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
+           outdir="src/java/org/apache/lucene/analysis/standard/std34"
+           nobak="on" />
   </target>
 
   <property name="tld.zones" value="http://www.internic.net/zones/root.zone"/>

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1302269&r1=1302268&r2=1302269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Mon Mar 19 04:14:31 2012
@@ -15,8 +15,8 @@
  */
 
 // Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Thursday, August 4, 2011 11:34:20 AM UTC
-// generated on Thursday, August 4, 2011 11:46:19 PM UTC
+// file version from Sunday, March 18, 2012 4:34:02 AM UTC
+// generated on Sunday, March 18, 2012 4:02:55 PM UTC
 // by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
 
 ASCIITLD = "." (
@@ -79,6 +79,7 @@ ASCIITLD = "." (
 	| [cC][rR]
 	| [cC][uU]
 	| [cC][vV]
+	| [cC][wW]
 	| [cC][xX]
 	| [cC][yY]
 	| [cC][zZ]
@@ -247,6 +248,7 @@ ASCIITLD = "." (
 	| [sS][tT]
 	| [sS][uU]
 	| [sS][vV]
+	| [sS][xX]
 	| [sS][yY]
 	| [sS][zZ]
 	| [tT][cC]
@@ -288,6 +290,7 @@ ASCIITLD = "." (
 	| [xX][nN]--3[eE]0[bB]707[eE]
 	| [xX][nN]--45[bB][rR][jJ]9[cC]
 	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
+	| [xX][nN]--80[aA][oO]21[aA]
 	| [xX][nN]--90[aA]3[aA][cC]
 	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
 	| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]

Modified: lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java?rev=1302269&r1=1302268&r2=1302269&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/core/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java Mon Mar 19 04:14:31 2012
@@ -23,8 +23,8 @@ import java.io.InputStreamReader;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
 import org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
+import org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -200,8 +200,10 @@ public final class UAX29URLEmailTokenize
   }
 
   private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
-    if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+    if (matchVersion.onOrAfter(Version.LUCENE_36)) {
       return new UAX29URLEmailTokenizerImpl(input);
+    } else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
+      return new UAX29URLEmailTokenizerImpl34(input);
     } else {
       return new UAX29URLEmailTokenizerImpl31(input);
     }