You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by ho...@apache.org on 2006/07/06 07:39:05 UTC
svn commit: r419443 - in /incubator/solr/trunk: ./ example/solr/conf/
src/java/org/apache/solr/analysis/ src/test/org/apache/solr/
src/test/org/apache/solr/analysis/ src/test/test-files/solr/conf/
Author: hossman
Date: Wed Jul 5 22:39:04 2006
New Revision: 419443
URL: http://svn.apache.org/viewvc?rev=419443&view=rev
Log:
SOLR-11 - BufferedTokenStream and RemoveDuplicatesTokenFilter from SOLR-11-BufferedTokenStream-RemoveDuplicatesTokenFilter.patch plus some additional tests and example config changes
Added:
incubator/solr/trunk/src/java/org/apache/solr/analysis/BufferedTokenStream.java
incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java
incubator/solr/trunk/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
incubator/solr/trunk/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
Modified:
incubator/solr/trunk/CHANGES.txt
incubator/solr/trunk/example/solr/conf/schema.xml
incubator/solr/trunk/example/solr/conf/synonyms.txt
incubator/solr/trunk/src/test/org/apache/solr/BasicFunctionalityTest.java
incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml
incubator/solr/trunk/src/test/test-files/solr/conf/synonyms.txt
Modified: incubator/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/CHANGES.txt?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/CHANGES.txt (original)
+++ incubator/solr/trunk/CHANGES.txt Wed Jul 5 22:39:04 2006
@@ -20,7 +20,13 @@
11. new DocSet.andNot(), DocSet.andNotSize() (yonik)
12. Ability to store term vectors. (Note: standard request handler does
not currently do anything with term vectors) (Mike Klaas via yonik, SOLR-23)
-
+13. New abstract BufferedTokenStream for people who want to write
+ Tokenizers or TokenFilters that require arbitrary buffering of the
+ stream. (SOLR-11 / yonik, hossman)
+14. New RemoveDuplicatesToken - useful in situations where
+ synonyms, stemming, or word-deliminater-ing produce identical tokens at
+ the same position. (SOLR-11 / yonik, hossman)
+
Changes in runtime behavior
1. classes reorganized into different packages, package names changed to Apache
2. force read of document stored fields in QuerySenderListener
Modified: incubator/solr/trunk/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/example/solr/conf/schema.xml?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/example/solr/conf/schema.xml (original)
+++ incubator/solr/trunk/example/solr/conf/schema.xml Wed Jul 5 22:39:04 2006
@@ -85,6 +85,7 @@
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldtype>
<!-- One could also specify an existing Analyzer implementation in Java
@@ -104,7 +105,10 @@
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
- Synonyms and stopwords are customized by external files, and stemming is enabled -->
+ Synonyms and stopwords are customized by external files, and stemming is enabled
+ Duplicate tokens at the same position (which may result from Stemmed Synonyms or
+ WordDelim parts) are removed.
+ -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -115,6 +119,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -123,6 +128,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldtype>
@@ -137,6 +143,7 @@
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldtype>
Modified: incubator/solr/trunk/example/solr/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/example/solr/conf/synonyms.txt?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/example/solr/conf/synonyms.txt (original)
+++ incubator/solr/trunk/example/solr/conf/synonyms.txt Wed Jul 5 22:39:04 2006
@@ -16,3 +16,5 @@
#spelling correction
pixima => pixma
+Television, Televisions, TV, TVs
+
Added: incubator/solr/trunk/src/java/org/apache/solr/analysis/BufferedTokenStream.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/analysis/BufferedTokenStream.java?rev=419443&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/analysis/BufferedTokenStream.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/analysis/BufferedTokenStream.java Wed Jul 5 22:39:04 2006
@@ -0,0 +1,144 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Handles input and output buffering of TokenStream
+ *
+ * <pre>
+ * // Example of a class implementing the rule "A" "B" => "Q" "B"
+ * class MyTokenStream extends BufferedTokenStream {
+ * public MyTokenStream(TokenStream input) {super(input);}
+ * protected Token process(Token t) throws IOException {
+ * if ("A".equals(t.termText())) {
+ * Token t2 = read();
+ * if (t2!=null && "B".equals(t2.termText())) t.setTermText("Q");
+ * if (t2!=null) pushBack(t2);
+ * }
+ * return t;
+ * }
+ * }
+ *
+ * // Example of a class implementing "A" "B" => "A" "A" "B"
+ * class MyTokenStream extends BufferedTokenStream {
+ * public MyTokenStream(TokenStream input) {super(input);}
+ * protected Token process(Token t) throws IOException {
+ * if ("A".equals(t.termText()) && "B".equals(peek(1).termText()))
+ * write(t);
+ * return t;
+ * }
+ * }
+ * </pre>
+ *
+ *
+ * @author yonik
+ * @version $Id$
+ */
+public abstract class BufferedTokenStream extends TokenStream {
+ // in the futute, might be faster if we implemented as an array based CircularQueue
+ private final LinkedList<Token> inQueue = new LinkedList<Token>();
+ private final LinkedList<Token> outQueue = new LinkedList<Token>();
+ private final TokenStream input;
+
+ public BufferedTokenStream(TokenStream input) {
+ this.input = input;
+ }
+
+ /**
+ * Process a token. Subclasses may read more tokens from the input stream,
+ * write more tokens to the output stream, or simply return the next token
+ * to be output. Subclasses may return null if the token is to be dropped.
+ * If a subclass writes tokens to the output stream and returns a
+ * non-null Token, the returned Token is considered to be at the head of
+ * the token output stream.
+ */
+ protected abstract Token process(Token t) throws IOException;
+
+ public final Token next() throws IOException {
+ while (true) {
+ if (!outQueue.isEmpty()) return outQueue.removeFirst();
+ Token t = read();
+ if (null == t) return null;
+ Token out = process(t);
+ if (null != out) return out;
+ // loop back to top in case process() put something on the output queue
+ }
+ }
+
+ /**
+ * Read a token from the buffered input stream.
+ * @return null at EOS
+ */
+ protected Token read() throws IOException {
+ if (inQueue.isEmpty()) {
+ Token t = input.next();
+ return t;
+ }
+ return inQueue.removeFirst();
+ }
+
+ /**
+ * Push a token back into the buffered input stream, such that it will
+ * be returned by a future call to <code>read()</code>
+ */
+ protected void pushBack(Token t) {
+ inQueue.addFirst(t);
+ }
+
+ /**
+ * Peek n tokens ahead in the buffered input stream, without modifying
+ * the stream.
+ * @param n Number of tokens into the input stream to peek, 1 based ...
+ * 0 is invalid
+ * @return a Token which exists in the input stream, any modifications
+ * made to this Token will be "real" if/when the Token is
+ * <code>read()</code> from the stream.
+ */
+ protected Token peek(int n) throws IOException {
+ int fillCount = n-inQueue.size();
+ for (int i=0; i < fillCount; i++) {
+ Token t = input.next();
+ if (null==t) return null;
+ inQueue.addLast(t);
+ }
+ return inQueue.get(n-1);
+ }
+
+ /**
+ * Write a token to the buffered output stream
+ */
+ protected void write(Token t) {
+ outQueue.addLast(t);
+ }
+
+ /**
+ * Provides direct Iterator access to the buffered output stream.
+ * Modifying any token in this Iterator will affect the resulting stream.
+ */
+ protected Iterable<Token> output() {
+ return outQueue;
+ }
+
+
+}
Added: incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java?rev=419443&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java Wed Jul 5 22:39:04 2006
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * A TokenFilter which filters out Tokens at the same position and Term
+ * text as the previous token in the stream.
+ */
+public class RemoveDuplicatesTokenFilter extends BufferedTokenStream {
+ public RemoveDuplicatesTokenFilter(TokenStream input) {super(input);}
+ protected Token process(Token t) throws IOException {
+ Token tok = read();
+ OUT: while (tok != null && tok.getPositionIncrement()==0) {
+ if (null != t) {
+ write(t);
+ t = null;
+ }
+ boolean dup=false;
+ IN: for (Token outTok : output()) {
+ if (outTok.termText().equals(tok.termText())) {
+ dup=true;
+ break IN;
+ }
+ }
+ if (!dup)
+ write(tok);
+ tok = read();
+ }
+ if (tok != null) pushBack(tok);
+ return t;
+ }
+}
Added: incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java?rev=419443&view=auto
==============================================================================
--- incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java (added)
+++ incubator/solr/trunk/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilterFactory.java Wed Jul 5 22:39:04 2006
@@ -0,0 +1,28 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * @version $Id:$
+ */
+public class RemoveDuplicatesTokenFilterFactory extends BaseTokenFilterFactory {
+ public TokenStream create(TokenStream input) {
+ return new RemoveDuplicatesTokenFilter(input);
+ }
+}
Modified: incubator/solr/trunk/src/test/org/apache/solr/BasicFunctionalityTest.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/BasicFunctionalityTest.java?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/BasicFunctionalityTest.java (original)
+++ incubator/solr/trunk/src/test/org/apache/solr/BasicFunctionalityTest.java Wed Jul 5 22:39:04 2006
@@ -17,6 +17,9 @@
package org.apache.solr;
import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.solr.search.*;
import org.apache.solr.request.*;
import org.apache.solr.util.*;
import org.apache.solr.schema.*;
@@ -200,6 +203,16 @@
);
}
+ /** @see TestRemoveDuplicatesTokenFilter */
+ public void testRemoveDuplicatesTokenFilter() {
+ Query q = QueryParsing.parseQuery("TV", "dedup",
+ h.getCore().getSchema());
+ assertTrue("not boolean?", q instanceof BooleanQuery);
+ assertEquals("unexpected number of stemmed synonym tokens",
+ 2, ((BooleanQuery) q).getClauses().length);
+ }
+
+
public void testTermVectorFields() {
IndexSchema ischema = new IndexSchema(getSchemaFile());
Added: incubator/solr/trunk/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java?rev=419443&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java Wed Jul 5 22:39:04 2006
@@ -0,0 +1,87 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.io.StringReader;
+
+/**
+ * Test that BufferedTokenStream behaves as advertized in subclasses.
+ */
+public class TestBufferedTokenStream extends TestCase {
+
+ /** Example of a class implementing the rule "A" "B" => "Q" "B" */
+ public static class AB_Q_Stream extends BufferedTokenStream {
+ public AB_Q_Stream(TokenStream input) {super(input);}
+ protected Token process(Token t) throws IOException {
+ if ("A".equals(t.termText())) {
+ Token t2 = read();
+ if (t2!=null && "B".equals(t2.termText())) t.setTermText("Q");
+ if (t2!=null) pushBack(t2);
+ }
+ return t;
+ }
+ }
+
+ /** Example of a class implementing "A" "B" => "A" "A" "B" */
+ public static class AB_AAB_Stream extends BufferedTokenStream {
+ public AB_AAB_Stream(TokenStream input) {super(input);}
+ protected Token process(Token t) throws IOException {
+ if ("A".equals(t.termText()) && "B".equals(peek(1).termText()))
+ write(t);
+ return t;
+ }
+ }
+
+ public static String tsToString(TokenStream in) throws IOException {
+ StringBuffer out = new StringBuffer();
+ Token t = in.next();
+ if (null != t)
+ out.append(t.termText());
+
+ for (t = in.next(); null != t; t = in.next()) {
+ out.append(" ").append(t.termText());
+ }
+ in.close();
+ return out.toString();
+ }
+
+ public void testABQ() throws Exception {
+ final String input = "How now A B brown A cow B like A B thing?";
+ final String expected = "How now Q B brown A cow B like Q B thing?";
+ TokenStream ts = new AB_Q_Stream
+ (new WhitespaceTokenizer(new StringReader(input)));
+ final String actual = tsToString(ts);
+ //System.out.println(actual);
+ assertEquals(expected, actual);
+ }
+
+ public void testABAAB() throws Exception {
+ final String input = "How now A B brown A cow B like A B thing?";
+ final String expected = "How now A A B brown A cow B like A A B thing?";
+ TokenStream ts = new AB_AAB_Stream
+ (new WhitespaceTokenizer(new StringReader(input)));
+ final String actual = tsToString(ts);
+ //System.out.println(actual);
+ assertEquals(expected, actual);
+ }
+}
Added: incubator/solr/trunk/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java?rev=419443&view=auto
==============================================================================
--- incubator/solr/trunk/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java (added)
+++ incubator/solr/trunk/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java Wed Jul 5 22:39:04 2006
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.analysis;
+
+import junit.framework.TestCase;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Arrays;
+
+public class TestRemoveDuplicatesTokenFilter extends TestCase {
+
+ public static Token tok(int pos, String t, int start, int end) {
+ Token tok = new Token(t,start,end);
+ tok.setPositionIncrement(pos);
+ return tok;
+ }
+ public static Token tok(int pos, String t) {
+ return tok(pos, t, 0,0);
+ }
+
+ public void testDups(final String expected, final Token... tokens)
+ throws Exception {
+
+ final Iterator<Token> toks = Arrays.asList(tokens).iterator();
+
+ final TokenStream ts = new RemoveDuplicatesTokenFilter
+ (new TokenStream() {
+ public Token next() { return toks.hasNext() ? toks.next() : null; }
+ });
+
+ final String actual = TestBufferedTokenStream.tsToString(ts);
+ assertEquals(expected + " != " + actual, expected, actual);
+
+ }
+
+ public void testNoDups() throws Exception {
+
+ testDups("A B B C D E"
+ ,tok(1,"A", 0, 4)
+ ,tok(1,"B", 5, 10)
+ ,tok(1,"B",11, 15)
+ ,tok(1,"C",16, 20)
+ ,tok(0,"D",16, 20)
+ ,tok(1,"E",21, 25)
+ );
+
+ }
+
+
+ public void testSimpleDups() throws Exception {
+
+ testDups("A B C D E"
+ ,tok(1,"A", 0, 4)
+ ,tok(1,"B", 5, 10)
+ ,tok(0,"B",11, 15)
+ ,tok(1,"C",16, 20)
+ ,tok(0,"D",16, 20)
+ ,tok(1,"E",21, 25)
+ );
+
+ }
+
+ public void testComplexDups() throws Exception {
+
+ testDups("A B C D E F G H I J K"
+ ,tok(1,"A")
+ ,tok(1,"B")
+ ,tok(0,"B")
+ ,tok(1,"C")
+ ,tok(1,"D")
+ ,tok(0,"D")
+ ,tok(0,"D")
+ ,tok(1,"E")
+ ,tok(1,"F")
+ ,tok(0,"F")
+ ,tok(1,"G")
+ ,tok(0,"H")
+ ,tok(0,"H")
+ ,tok(1,"I")
+ ,tok(1,"J")
+ ,tok(0,"K")
+ ,tok(0,"J")
+ );
+
+ }
+
+
+
+}
Modified: incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml (original)
+++ incubator/solr/trunk/src/test/test-files/solr/conf/schema.xml Wed Jul 5 22:39:04 2006
@@ -226,6 +226,19 @@
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
+
+ <!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
+ synonyms "better"
+ -->
+ <fieldtype name="dedup" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory"
+ synonyms="synonyms.txt" expand="true" />
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
+ </analyzer>
+ </fieldtype>
<fieldtype name="unstored" class="solr.StrField" indexed="true" stored="false"/>
@@ -296,6 +309,7 @@
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
+ <field name="dedup" type="dedup" indexed="true" stored="true"/>
<field name="subword" type="subword" indexed="true" stored="true"/>
Modified: incubator/solr/trunk/src/test/test-files/solr/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/incubator/solr/trunk/src/test/test-files/solr/conf/synonyms.txt?rev=419443&r1=419442&r2=419443&view=diff
==============================================================================
--- incubator/solr/trunk/src/test/test-files/solr/conf/synonyms.txt (original)
+++ incubator/solr/trunk/src/test/test-files/solr/conf/synonyms.txt Wed Jul 5 22:39:04 2006
@@ -3,4 +3,6 @@
c => c1,c2
a\=>a => b\=>b
a\,a => b\,b
-foo,bar,baz
\ No newline at end of file
+foo,bar,baz
+
+Television,TV,Televisions