You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/17 13:34:39 UTC

svn commit: r1059901 - in /lucene/dev/trunk/solr: contrib/extraction/src/test/resources/solr-extraction/conf/ src/java/org/apache/solr/analysis/ src/test-files/solr/conf/ src/test/org/apache/solr/analysis/

Author: rmuir
Date: Mon Jan 17 12:34:39 2011
New Revision: 1059901

URL: http://svn.apache.org/viewvc?rev=1059901&view=rev
Log:
SOLR-1930: remove analysis API deprecations

Removed:
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/EnglishPorterFilterFactoryTest.java
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
Modified:
    lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml
    lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
    lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml
    lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml
    lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml
    lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml
    lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java

Modified: lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml Mon Jan 17 12:34:39 2011
@@ -210,13 +210,14 @@
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java Mon Jan 17 12:34:39 2011
@@ -19,13 +19,9 @@ package org.apache.solr.analysis;
 
 import java.io.IOException;
 import java.io.Reader;
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Map;
-import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.pattern.PatternTokenizer;
 import org.apache.solr.common.SolrException;
@@ -104,65 +100,4 @@ public class PatternTokenizerFactory ext
       throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, ex );
     }
   }
-  
-  /**
-   * This behaves just like String.split( ), but returns a list of Tokens
-   * rather then an array of strings
-   * NOTE: This method is not used in 1.4.
-   * @deprecated
-   */
-  @Deprecated
-  public static List<Token> split( Matcher matcher, String input )
-  {
-    int index = 0;
-    int lastNonEmptySize = Integer.MAX_VALUE;
-    ArrayList<Token> matchList = new ArrayList<Token>();
-
-    // Add segments before each match found
-    while(matcher.find()) {
-      String match = input.subSequence(index, matcher.start()).toString();
-      matchList.add( new Token( match, index, matcher.start()) );
-      index = matcher.end();
-      if( match.length() > 0 ) {
-        lastNonEmptySize = matchList.size();
-      }
-    }
-
-    // If no match is found, return the full string
-    if (index == 0) {
-      matchList.add( new Token( input, 0, input.length()) );
-    }
-    else { 
-      String match = input.subSequence(index, input.length()).toString();
-      matchList.add( new Token( match, index, input.length()) );
-      if( match.length() > 0 ) {
-        lastNonEmptySize = matchList.size();
-      }
-    }
-    
-    // Don't use trailing empty strings.  This behavior matches String.split();
-    if( lastNonEmptySize < matchList.size() ) {
-      return matchList.subList( 0, lastNonEmptySize );
-    }
-    return matchList;
-  }
-  
-  /**
-   * Create tokens from the matches in a matcher 
-   * NOTE: This method is not used in 1.4.
-   * @deprecated
-   */
-  @Deprecated
-  public static List<Token> group( Matcher matcher, String input, int group )
-  {
-    ArrayList<Token> matchList = new ArrayList<Token>();
-    while(matcher.find()) {
-      Token t = new Token( 
-        matcher.group(group), 
-        matcher.start(group), 
-        matcher.end(group) );
-      matchList.add( t );
-    }
-    return matchList;
-  }
 }

Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml Mon Jan 17 12:34:39 2011
@@ -202,13 +202,14 @@
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">

Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml Mon Jan 17 12:34:39 2011
@@ -193,13 +193,14 @@
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">

Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml Mon Jan 17 12:34:39 2011
@@ -236,13 +236,14 @@
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">

Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml Mon Jan 17 12:34:39 2011
@@ -252,13 +252,14 @@
     <fieldtype name="engporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="custengporterfilt" class="solr.TextField">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
     <fieldtype name="stopfilt" class="solr.TextField">
@@ -286,14 +287,14 @@
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.StopFilterFactory"/>
-          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.StopFilterFactory"/>
-          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
 
@@ -303,14 +304,14 @@
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
           <filter class="solr.StopFilterFactory"/>
-          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
           <filter class="solr.LowerCaseFilterFactory"/>
           <filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
           <filter class="solr.StopFilterFactory"/>
-          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldtype>
 
@@ -375,7 +376,7 @@
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
                 catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
       <analyzer type="query">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -384,7 +385,7 @@
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
                 catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
       </analyzer>
     </fieldType>
 
@@ -397,7 +398,7 @@
           <tokenizer class="solr.WhitespaceTokenizerFactory"/>
           <filter class="solr.SynonymFilterFactory"
                   synonyms="synonyms.txt" expand="true" />
-          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.PorterStemFilterFactory"/>
           <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
       </analyzer>
     </fieldtype>

Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java Mon Jan 17 12:34:39 2011
@@ -33,7 +33,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.ArrayList;
-import java.util.Collections;
 
 public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
 
@@ -59,37 +58,6 @@ public class SnowballPorterFilterFactory
     assertTokenStreamContents(stream, gold);
   }
 
-  /**
-   * Tests the protected words mechanism of EnglishPorterFilterFactory
-   */
-  @Deprecated
-  public void testProtectedOld() throws Exception {
-    EnglishStemmer stemmer = new EnglishStemmer();
-    String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
-    String[] gold = new String[test.length];
-    for (int i = 0; i < test.length; i++) {
-      if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
-        stemmer.setCurrent(test[i]);
-        stemmer.stem();
-        gold[i] = stemmer.getCurrent();
-      } else {
-        gold[i] = test[i];
-      }
-    }
-
-    EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
-    Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
-    args.put(SnowballPorterFilterFactory.PROTECTED_TOKENS, "who-cares.txt");
-    factory.init(args);
-    List<String> lines = new ArrayList<String>();
-    Collections.addAll(lines, "banks", "fledgling");
-    factory.inform(new LinesMockSolrResourceLoader(lines));
-    Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION,
-        new StringReader(StrUtils.join(Arrays.asList(test), ' ')));
-    TokenStream stream = factory.create(tokenizer);
-    assertTokenStreamContents(stream, gold);
-  }
-
   class LinesMockSolrResourceLoader implements ResourceLoader {
     List<String> lines;