You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/10 12:10:13 UTC
svn commit: r1242743 - in /lucene/dev/branches/branch_3x: ./ lucene/
lucene/contrib/
lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/
lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/
solr/ solr/core/ solr/c...
Author: rmuir
Date: Fri Feb 10 11:10:12 2012
New Revision: 1242743
URL: http://svn.apache.org/viewvc?rev=1242743&view=rev
Log:
LUCENE-3748: EnglishPossessiveFilter did not work with a proper right quotation mark
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/core/ (props changed)
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Fri Feb 10 11:10:12 2012
@@ -80,6 +80,9 @@ Changes in runtime behavior
the Snowball German2 algorithm to ae/oe/ue and case-folds Ã. Add
GalicianMinimalStemFilter for plural removal only. (Robert Muir)
+ * LUCENE-3748: EnglishPossessiveFilter did not work with Unicode right
+ single quotation mark (U+2019). (David Croley via Robert Muir)
+
Optimizations
* SOLR-2888: FSTSuggester refactoring: internal storage is now UTF-8,
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java Fri Feb 10 11:10:12 2012
@@ -107,7 +107,7 @@ public final class EnglishAnalyzer exten
TokenStream result = new StandardFilter(matchVersion, source);
// prior to this we get the classic behavior, standardfilter does it for us.
if (matchVersion.onOrAfter(Version.LUCENE_31))
- result = new EnglishPossessiveFilter(result);
+ result = new EnglishPossessiveFilter(matchVersion, result);
result = new LowerCaseFilter(matchVersion, result);
result = new StopFilter(matchVersion, result, stopwords);
if(!stemExclusionSet.isEmpty())
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java Fri Feb 10 11:10:12 2012
@@ -22,15 +22,34 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
/**
* TokenFilter that removes possessives (trailing 's) from words.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating EnglishPossessiveFilter:
+ * <ul>
+ * <li> As of 3.6, U+2019 RIGHT SINGLE QUOTATION MARK and
+ * U+FF07 FULLWIDTH APOSTROPHE are also treated as
+ * quotation marks.
+ * </ul>
*/
public final class EnglishPossessiveFilter extends TokenFilter {
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private Version matchVersion;
+ /**
+ * @deprecated Use {@link #EnglishPossessiveFilter(Version, TokenStream)} instead.
+ */
+ @Deprecated
public EnglishPossessiveFilter(TokenStream input) {
+ this(Version.LUCENE_35, input);
+ }
+
+ public EnglishPossessiveFilter(Version version, TokenStream input) {
super(input);
+ this.matchVersion = version;
}
@Override
@@ -42,10 +61,12 @@ public final class EnglishPossessiveFilt
final char[] buffer = termAtt.buffer();
final int bufferLength = termAtt.length();
- if (bufferLength >= 2 &&
- buffer[bufferLength-2] == '\'' &&
- (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S'))
+ if (bufferLength >= 2 &&
+ (buffer[bufferLength-2] == '\'' ||
+ (matchVersion.onOrAfter(Version.LUCENE_36) && (buffer[bufferLength-2] == '\u2019' || buffer[bufferLength-2] == '\uFF07'))) &&
+ (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
+ }
return true;
}
Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Fri Feb 10 11:10:12 2012
@@ -40,6 +40,8 @@ public class TestEnglishAnalyzer extends
assertAnalyzesTo(a, "the", new String[] {});
// possessive removal
checkOneTermReuse(a, "steven's", "steven");
+ checkOneTermReuse(a, "steven\u2019s", "steven");
+ checkOneTermReuse(a, "steven\uFF07s", "steven");
}
/** test use of exclusion set */
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java Fri Feb 10 11:10:12 2012
@@ -17,6 +17,8 @@ package org.apache.solr.analysis;
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
@@ -33,7 +35,14 @@ import org.apache.lucene.analysis.en.Eng
* @version $Id$
*/
public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
+
+ @Override
+ public void init(Map<String,String> args) {
+ super.init(args);
+ assureMatchVersion();
+ }
+
public TokenStream create(TokenStream input) {
- return new EnglishPossessiveFilter(input);
+ return new EnglishPossessiveFilter(luceneMatchVersion, input);
}
}