You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2012/02/20 16:59:53 UTC
svn commit: r1291329 - in /lucene/dev/branches/branch_3x: ./ solr/
solr/core/ solr/core/src/java/org/apache/solr/spelling/
solr/core/src/test-files/solr/conf/
solr/core/src/test/org/apache/solr/spelling/
solr/core/src/test/org/apache/solr/spelling/sugg...
Author: rmuir
Date: Mon Feb 20 15:59:53 2012
New Revision: 1291329
URL: http://svn.apache.org/viewvc?rev=1291329&view=rev
Log:
SOLR-3143: add SuggestQueryConverter for autosuggesters
Added:
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
- copied unchanged from r1291322, lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/phrasesuggest.txt
- copied unchanged from r1291322, lucene/dev/trunk/solr/core/src/test-files/solr/conf/phrasesuggest.txt
lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/schema-phrasesuggest.xml
- copied unchanged from r1291322, lucene/dev/trunk/solr/core/src/test-files/solr/conf/schema-phrasesuggest.xml
lucene/dev/branches/branch_3x/solr/core/src/test-files/solr/conf/solrconfig-phrasesuggest.xml
- copied unchanged from r1291322, lucene/dev/trunk/solr/core/src/test-files/solr/conf/solrconfig-phrasesuggest.xml
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
- copied, changed from r1291322, lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/suggest/TestPhraseSuggestions.java
- copied unchanged from r1291322, lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/suggest/TestPhraseSuggestions.java
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/CHANGES.txt
lucene/dev/branches/branch_3x/solr/core/ (props changed)
lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
Modified: lucene/dev/branches/branch_3x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/CHANGES.txt?rev=1291329&r1=1291328&r2=1291329&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/CHANGES.txt Mon Feb 20 15:59:53 2012
@@ -94,6 +94,9 @@ New Features
* LUCENE-3714: Add WFSTLookupFactory, a suggester that uses a weighted FST
for more fine-grained suggestions. (Mike McCandless, Dawid Weiss, Robert Muir)
+* SOLR-3143: Add SuggestQueryConverter, a QueryConverter intended for
+ auto-suggesters. (Robert Muir)
+
Optimizations
----------------------
* SOLR-1931: Speedup for LukeRequestHandler and admin/schema browser. New parameter
Modified: lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=1291329&r1=1291328&r2=1291329&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Mon Feb 20 15:59:53 2012
@@ -18,6 +18,7 @@
package org.apache.solr.spelling;
import java.io.IOException;
+import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
@@ -100,39 +101,42 @@ public class SpellingQueryConverter exte
Collection<Token> result = new ArrayList<Token>();
//TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream
Matcher matcher = QUERY_REGEX.matcher(original);
- TokenStream stream;
while (matcher.find()) {
String word = matcher.group(0);
if (word.equals("AND") == false && word.equals("OR") == false) {
try {
- stream = analyzer.reusableTokenStream("", new StringReader(word));
- // TODO: support custom attributes
- CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
- FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
- TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
- PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
- PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
- stream.reset();
- while (stream.incrementToken()) {
- Token token = new Token();
- token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
- token.setStartOffset(matcher.start() + offsetAtt.startOffset());
- token.setEndOffset(matcher.start() + offsetAtt.endOffset());
- token.setFlags(flagsAtt.getFlags());
- token.setType(typeAtt.type());
- token.setPayload(payloadAtt.getPayload());
- token.setPositionIncrement(posIncAtt.getPositionIncrement());
- result.add(token);
- }
- stream.end();
- stream.close();
+ analyze(result, new StringReader(word), matcher.start());
} catch (IOException e) {
+ // TODO: shouldn't we log something?
}
}
}
return result;
}
-
+
+ protected void analyze(Collection<Token> result, Reader text, int offset) throws IOException {
+ TokenStream stream = analyzer.reusableTokenStream("", text);
+ // TODO: support custom attributes
+ CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
+ FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
+ TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
+ PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
+ PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
+ OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
+ stream.reset();
+ while (stream.incrementToken()) {
+ Token token = new Token();
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.setStartOffset(offset + offsetAtt.startOffset());
+ token.setEndOffset(offset + offsetAtt.endOffset());
+ token.setFlags(flagsAtt.getFlags());
+ token.setType(typeAtt.type());
+ token.setPayload(payloadAtt.getPayload());
+ token.setPositionIncrement(posIncAtt.getPositionIncrement());
+ result.add(token);
+ }
+ stream.end();
+ stream.close();
+ }
}
Copied: lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java (from r1291322, lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java?p2=lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java&p1=lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java&r1=1291322&r2=1291329&rev=1291329&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java (original)
+++ lucene/dev/branches/branch_3x/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java Mon Feb 20 15:59:53 2012
@@ -22,18 +22,18 @@ import java.io.Reader;
import java.util.Collection;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.miscellaneous.TrimFilter;
-import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
+import org.apache.lucene.analysis.KeywordTokenizer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.solr.analysis.TrimFilter;
+import org.apache.solr.analysis.PatternReplaceFilter;
public class TestSuggestSpellingConverter extends BaseTokenStreamTestCase {
SuggestQueryConverter converter = new SuggestQueryConverter();
@@ -46,7 +46,7 @@ public class TestSuggestSpellingConverte
public void testComplicated() throws Exception {
// lowercases, removes field names, other syntax, collapses runs of whitespace, etc.
- converter.setAnalyzer(new Analyzer() {
+ converter.setAnalyzer(new ReusableAnalyzerBase() {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = new KeywordTokenizer(reader);