You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/01/17 13:34:39 UTC
svn commit: r1059901 - in /lucene/dev/trunk/solr:
contrib/extraction/src/test/resources/solr-extraction/conf/
src/java/org/apache/solr/analysis/ src/test-files/solr/conf/
src/test/org/apache/solr/analysis/
Author: rmuir
Date: Mon Jan 17 12:34:39 2011
New Revision: 1059901
URL: http://svn.apache.org/viewvc?rev=1059901&view=rev
Log:
SOLR-1930: remove analysis API deprecations
Removed:
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/EnglishPorterFilterFactory.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/EnglishPorterFilterFactoryTest.java
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
Modified:
lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml
lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml
lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml
lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml
lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml
lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
Modified: lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/contrib/extraction/src/test/resources/solr-extraction/conf/schema.xml Mon Jan 17 12:34:39 2011
@@ -210,13 +210,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java (original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/analysis/PatternTokenizerFactory.java Mon Jan 17 12:34:39 2011
@@ -19,13 +19,9 @@ package org.apache.solr.analysis;
import java.io.IOException;
import java.io.Reader;
-import java.util.ArrayList;
-import java.util.List;
import java.util.Map;
-import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.pattern.PatternTokenizer;
import org.apache.solr.common.SolrException;
@@ -104,65 +100,4 @@ public class PatternTokenizerFactory ext
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, ex );
}
}
-
- /**
- * This behaves just like String.split( ), but returns a list of Tokens
- * rather then an array of strings
- * NOTE: This method is not used in 1.4.
- * @deprecated
- */
- @Deprecated
- public static List<Token> split( Matcher matcher, String input )
- {
- int index = 0;
- int lastNonEmptySize = Integer.MAX_VALUE;
- ArrayList<Token> matchList = new ArrayList<Token>();
-
- // Add segments before each match found
- while(matcher.find()) {
- String match = input.subSequence(index, matcher.start()).toString();
- matchList.add( new Token( match, index, matcher.start()) );
- index = matcher.end();
- if( match.length() > 0 ) {
- lastNonEmptySize = matchList.size();
- }
- }
-
- // If no match is found, return the full string
- if (index == 0) {
- matchList.add( new Token( input, 0, input.length()) );
- }
- else {
- String match = input.subSequence(index, input.length()).toString();
- matchList.add( new Token( match, index, input.length()) );
- if( match.length() > 0 ) {
- lastNonEmptySize = matchList.size();
- }
- }
-
- // Don't use trailing empty strings. This behavior matches String.split();
- if( lastNonEmptySize < matchList.size() ) {
- return matchList.subList( 0, lastNonEmptySize );
- }
- return matchList;
- }
-
- /**
- * Create tokens from the matches in a matcher
- * NOTE: This method is not used in 1.4.
- * @deprecated
- */
- @Deprecated
- public static List<Token> group( Matcher matcher, String input, int group )
- {
- ArrayList<Token> matchList = new ArrayList<Token>();
- while(matcher.find()) {
- Token t = new Token(
- matcher.group(group),
- matcher.start(group),
- matcher.end(group) );
- matchList.add( t );
- }
- return matchList;
- }
}
Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema-copyfield-test.xml Mon Jan 17 12:34:39 2011
@@ -202,13 +202,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema-required-fields.xml Mon Jan 17 12:34:39 2011
@@ -193,13 +193,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema.xml Mon Jan 17 12:34:39 2011
@@ -236,13 +236,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
Modified: lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml (original)
+++ lucene/dev/trunk/solr/src/test-files/solr/conf/schema12.xml Mon Jan 17 12:34:39 2011
@@ -252,13 +252,14 @@
<fieldtype name="engporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solr.TextField">
@@ -286,14 +287,14 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
@@ -303,14 +304,14 @@
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.WordDelimiterFilterFactory" protected="protwords.txt" splitOnNumerics="0" splitOnCaseChange="0" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.StopFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
@@ -375,7 +376,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -384,7 +385,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -397,7 +398,7 @@
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory"
synonyms="synonyms.txt" expand="true" />
- <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
</analyzer>
</fieldtype>
Modified: lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java?rev=1059901&r1=1059900&r2=1059901&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java (original)
+++ lucene/dev/trunk/solr/src/test/org/apache/solr/analysis/SnowballPorterFilterFactoryTest.java Mon Jan 17 12:34:39 2011
@@ -33,7 +33,6 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.ArrayList;
-import java.util.Collections;
public class SnowballPorterFilterFactoryTest extends BaseTokenTestCase {
@@ -59,37 +58,6 @@ public class SnowballPorterFilterFactory
assertTokenStreamContents(stream, gold);
}
- /**
- * Tests the protected words mechanism of EnglishPorterFilterFactory
- */
- @Deprecated
- public void testProtectedOld() throws Exception {
- EnglishStemmer stemmer = new EnglishStemmer();
- String[] test = {"The", "fledgling", "banks", "were", "counting", "on", "a", "big", "boom", "in", "banking"};
- String[] gold = new String[test.length];
- for (int i = 0; i < test.length; i++) {
- if (test[i].equals("fledgling") == false && test[i].equals("banks") == false) {
- stemmer.setCurrent(test[i]);
- stemmer.stem();
- gold[i] = stemmer.getCurrent();
- } else {
- gold[i] = test[i];
- }
- }
-
- EnglishPorterFilterFactory factory = new EnglishPorterFilterFactory();
- Map<String, String> args = new HashMap<String, String>(DEFAULT_VERSION_PARAM);
- args.put(SnowballPorterFilterFactory.PROTECTED_TOKENS, "who-cares.txt");
- factory.init(args);
- List<String> lines = new ArrayList<String>();
- Collections.addAll(lines, "banks", "fledgling");
- factory.inform(new LinesMockSolrResourceLoader(lines));
- Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION,
- new StringReader(StrUtils.join(Arrays.asList(test), ' ')));
- TokenStream stream = factory.create(tokenizer);
- assertTokenStreamContents(stream, gold);
- }
-
class LinesMockSolrResourceLoader implements ResourceLoader {
List<String> lines;