You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/08/08 14:14:23 UTC
svn commit: r1154939 [1/3] - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/backwards/ lucene/backwards/src/test-framework/
lucene/backwards/src/test/
lucene/src/java/org/apache/lucene/analysis/standard/
lucene/src/java/org/apache/lucene/analysis...
Author: rmuir
Date: Mon Aug 8 12:14:22 2011
New Revision: 1154939
URL: http://svn.apache.org/viewvc?rev=1154939&view=rev
Log:
LUCENE-3361: port url+email tokenizer to standardtokenizerinterface, fix combining marks bug
Added:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/ASCIITLD.jflex-macro
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/SUPPLEMENTARY.jflex-macro
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex
- copied unchanged from r1154936, lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex
Removed:
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/backwards/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test/ (props changed)
lucene/dev/branches/branch_3x/lucene/backwards/src/test-framework/ (props changed)
lucene/dev/branches/branch_3x/lucene/build.xml
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/analysis/TestUAX29URLEmailTokenizer.java
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/UAX29URLEmailTokenizerFactory.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/analysis/TestUAX29URLEmailTokenizerFactory.java
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1154939&r1=1154938&r2=1154939&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Mon Aug 8 12:14:22 2011
@@ -33,10 +33,11 @@ Bug fixes
* LUCENE-3340: Fixed case where IndexWriter was not flushing at
exactly maxBufferedDeleteTerms (Mike McCandless)
-* LUCENE-3358: StandardTokenizer wrongly discarded combining marks attached
- to Han or Hiragana characters, this is fixed if you supply Version >= 3.4
- If you supply a previous lucene version, you get the old buggy behavior
- for backwards compatibility. (Trejkaz, Robert Muir)
+* LUCENE-3358, LUCENE-3361: StandardTokenizer and UAX29URLEmailTokenizer
+ wrongly discarded combining marks attached to Han or Hiragana characters,
+ this is fixed if you supply Version >= 3.4 If you supply a previous
+ lucene version, you get the old buggy behavior for backwards compatibility.
+ (Trejkaz, Robert Muir)
New Features
Modified: lucene/dev/branches/branch_3x/lucene/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/build.xml?rev=1154939&r1=1154938&r2=1154939&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/build.xml (original)
+++ lucene/dev/branches/branch_3x/lucene/build.xml Mon Aug 8 12:14:22 2011
@@ -512,7 +512,7 @@
</subant>
</target>
- <target name="jflex-StandardAnalyzer" depends="init,jflex-check,gen-tlds" if="jflex.present">
+ <target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
@@ -532,9 +532,12 @@
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
- <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.jflex"
+ <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
+ <jflex file="src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl31.jflex"
+ outdir="src/java/org/apache/lucene/analysis/standard/std31"
+ nobak="on" />
</target>
<property name="tld.zones" value="http://www.internic.net/zones/root.zone"/>
@@ -556,7 +559,7 @@
<target name="clean-jflex">
<delete>
- <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
+ <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
</delete>
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro?rev=1154939&r1=1154938&r2=1154939&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro Mon Aug 8 12:14:22 2011
@@ -15,8 +15,8 @@
*/
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
-// file version from Wednesday, February 9, 2011 12:34:10 PM UTC
-// generated on Wednesday, February 9, 2011 4:45:18 PM UTC
+// file version from Thursday, August 4, 2011 11:34:20 AM UTC
+// generated on Thursday, August 4, 2011 11:46:19 PM UTC
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
ASCIITLD = "." (
@@ -288,6 +288,7 @@ ASCIITLD = "." (
| [xX][nN]--3[eE]0[bB]707[eE]
| [xX][nN]--45[bB][rR][jJ]9[cC]
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
+ | [xX][nN]--90[aA]3[aA][cC]
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
@@ -305,9 +306,11 @@ ASCIITLD = "." (
| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
| [xX][nN]--[kK][pP][rR][wW]13[dD]
| [xX][nN]--[kK][pP][rR][yY]57[dD]
+ | [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
+ | [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
| [xX][nN]--[oO]3[cC][wW]4[hH]
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
@@ -321,6 +324,7 @@ ASCIITLD = "." (
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
+ | [xX][xX][xX]
| [yY][eE]
| [yY][tT]
| [zZ][aA]