You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/10 12:10:13 UTC

svn commit: r1242743 - in /lucene/dev/branches/branch_3x: ./ lucene/ lucene/contrib/ lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/ lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/ solr/ solr/core/ solr/c...

Author: rmuir
Date: Fri Feb 10 11:10:12 2012
New Revision: 1242743

URL: http://svn.apache.org/viewvc?rev=1242743&view=rev
Log:
LUCENE-3748: EnglishPossessiveFilter did not work with a proper right quotation mark

Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
    lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/   (props changed)
    lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java

Modified: lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/CHANGES.txt Fri Feb 10 11:10:12 2012
@@ -80,6 +80,9 @@ Changes in runtime behavior
    the Snowball German2 algorithm to ae/oe/ue and case-folds ß. Add 
    GalicianMinimalStemFilter for plural removal only. (Robert Muir)
 
+ * LUCENE-3748: EnglishPossessiveFilter did not work with Unicode right 
+   single quotation mark (U+2019).  (David Croley via Robert Muir)
+
 Optimizations
 
 * SOLR-2888: FSTSuggester refactoring: internal storage is now UTF-8, 

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java Fri Feb 10 11:10:12 2012
@@ -107,7 +107,7 @@ public final class EnglishAnalyzer exten
     TokenStream result = new StandardFilter(matchVersion, source);
     // prior to this we get the classic behavior, standardfilter does it for us.
     if (matchVersion.onOrAfter(Version.LUCENE_31))
-      result = new EnglishPossessiveFilter(result);
+      result = new EnglishPossessiveFilter(matchVersion, result);
     result = new LowerCaseFilter(matchVersion, result);
     result = new StopFilter(matchVersion, result, stopwords);
     if(!stemExclusionSet.isEmpty())

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/java/org/apache/lucene/analysis/en/EnglishPossessiveFilter.java Fri Feb 10 11:10:12 2012
@@ -22,15 +22,34 @@ import java.io.IOException;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.util.Version;
 
 /**
  * TokenFilter that removes possessives (trailing 's) from words.
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating EnglishPossessiveFilter:
+ * <ul>
+ *    <li> As of 3.6, U+2019 RIGHT SINGLE QUOTATION MARK and 
+ *         U+FF07 FULLWIDTH APOSTROPHE are also treated as
+ *         quotation marks.
+ * </ul>
  */
 public final class EnglishPossessiveFilter extends TokenFilter {
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private Version matchVersion;
 
+  /**
+   * @deprecated Use {@link #EnglishPossessiveFilter(Version, TokenStream)} instead.
+   */
+  @Deprecated
   public EnglishPossessiveFilter(TokenStream input) {
+    this(Version.LUCENE_35, input);
+  }
+
+  public EnglishPossessiveFilter(Version version, TokenStream input) {
     super(input);
+    this.matchVersion = version;
   }
 
   @Override
@@ -42,10 +61,12 @@ public final class EnglishPossessiveFilt
     final char[] buffer = termAtt.buffer();
     final int bufferLength = termAtt.length();
     
-    if (bufferLength >= 2 &&
-        buffer[bufferLength-2] == '\'' &&
-        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S'))
+    if (bufferLength >= 2 && 
+        (buffer[bufferLength-2] == '\'' || 
+         (matchVersion.onOrAfter(Version.LUCENE_36) && (buffer[bufferLength-2] == '\u2019' || buffer[bufferLength-2] == '\uFF07'))) &&
+        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
       termAtt.setLength(bufferLength - 2); // Strip last 2 characters off
+    }
 
     return true;
   }

Modified: lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/analyzers/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java Fri Feb 10 11:10:12 2012
@@ -40,6 +40,8 @@ public class TestEnglishAnalyzer extends
     assertAnalyzesTo(a, "the", new String[] {});
     // possessive removal
     checkOneTermReuse(a, "steven's", "steven");
+    checkOneTermReuse(a, "steven\u2019s", "steven");
+    checkOneTermReuse(a, "steven\uFF07s", "steven");
   }
   
   /** test use of exclusion set */

Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java?rev=1242743&r1=1242742&r2=1242743&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/analysis/EnglishPossessiveFilterFactory.java Fri Feb 10 11:10:12 2012
@@ -17,6 +17,8 @@ package org.apache.solr.analysis;
  * limitations under the License.
  */
 
+import java.util.Map;
+
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
 
@@ -33,7 +35,14 @@ import org.apache.lucene.analysis.en.Eng
  * @version $Id$
  */
 public class EnglishPossessiveFilterFactory extends BaseTokenFilterFactory {
+  
+  @Override
+  public void init(Map<String,String> args) {
+    super.init(args);
+    assureMatchVersion();
+  }
+  
   public TokenStream create(TokenStream input) {
-    return new EnglishPossessiveFilter(input);
+    return new EnglishPossessiveFilter(luceneMatchVersion, input);
   }
 }