You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jd...@apache.org on 2012/06/04 20:07:05 UTC
svn commit: r1346069 - in /lucene/dev/branches/branch_4x/solr: ./
core/src/java/org/apache/solr/handler/component/
core/src/java/org/apache/solr/spelling/ core/src/test-files/solr/conf/
core/src/test/org/apache/solr/handler/component/ core/src/test/org...
Author: jdyer
Date: Mon Jun 4 18:07:04 2012
New Revision: 1346069
URL: http://svn.apache.org/viewvc?rev=1346069&view=rev
Log:
SOLR-2993: o.a.l.Search.Suggest.WordBreakSpellChecker Solr integration
Added:
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
- copied unchanged from r1346058, lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ConjunctionSolrSpellChecker.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
- copied unchanged from r1346058, lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/ResultEntry.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
- copied unchanged from r1346058, lucene/dev/trunk/solr/core/src/java/org/apache/solr/spelling/WordBreakSolrSpellChecker.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
- copied unchanged from r1346058, lucene/dev/trunk/solr/core/src/test/org/apache/solr/spelling/WordBreakSolrSpellCheckerTest.java
Removed:
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/RankedSpellPossibility.java
Modified:
lucene/dev/branches/branch_4x/solr/CHANGES.txt (contents, props changed)
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig.xml
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
lucene/dev/branches/branch_4x/solr/example/solr/conf/solrconfig.xml
Modified: lucene/dev/branches/branch_4x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/CHANGES.txt?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/solr/CHANGES.txt Mon Jun 4 18:07:04 2012
@@ -333,6 +333,12 @@ New Features
UUIDUpdateProcessorFactory
DefaultValueUpdateProcessorFactory
(hossman)
+
+* SOLR-2993: Add WordBreakSolrSpellChecker to offer suggestions by combining adjacent
+ query terms and/or breaking terms into multiple words. This spellchecker can be
+ configured with a traditional checker (ie: DirectSolrSpellChecker). The results
+ are combined and collations can contain a mix of corrections from both spellcheckers.
+ (James Dyer)
Optimizations
----------------------
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java Mon Jun 4 18:07:04 2012
@@ -124,6 +124,7 @@ public class SpellCheckComponent extends
String q = params.get(SPELLCHECK_Q);
SolrSpellChecker spellChecker = getSpellChecker(params);
Collection<Token> tokens = null;
+
if (q != null) {
//we have a spell check param, tokenize it with the query analyzer applicable for this spellchecker
tokens = getTokens(q, spellChecker.getQueryAnalyzer());
@@ -143,7 +144,10 @@ public class SpellCheckComponent extends
float accuracy = params.getFloat(SPELLCHECK_ACCURACY, Float.MIN_VALUE);
Integer alternativeTermCount = params.getInt(SpellingParams.SPELLCHECK_ALTERNATIVE_TERM_COUNT);
Integer maxResultsForSuggest = params.getInt(SpellingParams.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST);
- SolrParams customParams = getCustomParams(getDictionaryName(params), params);
+ ModifiableSolrParams customParams = new ModifiableSolrParams();
+ for (String checkerName : getDictionaryNames(params)) {
+ customParams.add(getCustomParams(checkerName, params));
+ }
Integer hitsInteger = (Integer) rb.rsp.getToLog().get("hits");
long hits = 0;
@@ -173,7 +177,7 @@ public class SpellCheckComponent extends
NamedList suggestions = toNamedList(shardRequest, spellingResult, q,
extendedResults, collate, isCorrectlySpelled);
if (collate) {
- addCollationsToResponse(params, spellingResult, rb, q, suggestions);
+ addCollationsToResponse(params, spellingResult, rb, q, suggestions, spellChecker.isSuggestionsMayOverlap());
}
NamedList response = new SimpleOrderedMap();
response.add("suggestions", suggestions);
@@ -181,14 +185,14 @@ public class SpellCheckComponent extends
} else {
throw new SolrException(SolrException.ErrorCode.NOT_FOUND,
- "Specified dictionary does not exist: " + getDictionaryName(params));
+ "Specified dictionaries do not exist: " + getDictionaryNameAsSingleString(getDictionaryNames(params)));
}
}
}
@SuppressWarnings("unchecked")
protected void addCollationsToResponse(SolrParams params, SpellingResult spellingResult, ResponseBuilder rb, String q,
- NamedList response) {
+ NamedList response, boolean suggestionsMayOverlap) {
int maxCollations = params.getInt(SPELLCHECK_MAX_COLLATIONS, 1);
int maxCollationTries = params.getInt(SPELLCHECK_MAX_COLLATION_TRIES, 0);
int maxCollationEvaluations = params.getInt(SPELLCHECK_MAX_COLLATION_EVALUATIONS, 10000);
@@ -196,8 +200,8 @@ public class SpellCheckComponent extends
boolean shard = params.getBool(ShardParams.IS_SHARD, false);
SpellCheckCollator collator = new SpellCheckCollator();
- List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations);
- //by sorting here we guarantee a non-distributed request returns all
+ List<SpellCheckCollation> collations = collator.collate(spellingResult, q, rb, maxCollations, maxCollationTries, maxCollationEvaluations, suggestionsMayOverlap);
+ //by sorting here we guarantee a non-distributed request returns all
//results in the same order as a distributed request would,
//even in cases when the internal rank is the same.
Collections.sort(collations);
@@ -459,13 +463,38 @@ public class SpellCheckComponent extends
}
protected SolrSpellChecker getSpellChecker(SolrParams params) {
- return spellCheckers.get(getDictionaryName(params));
+ String[] dictName = getDictionaryNames(params);
+ if (dictName.length == 1) {
+ return spellCheckers.get(dictName[0]);
+ } else {
+ String singleStr = getDictionaryNameAsSingleString(dictName);
+ SolrSpellChecker ssc = spellCheckers.get(singleStr);
+ if (ssc == null) {
+ ConjunctionSolrSpellChecker cssc = new ConjunctionSolrSpellChecker();
+ for (String dn : dictName) {
+ cssc.addChecker(spellCheckers.get(dn));
+ }
+ ssc = cssc;
+ }
+ return ssc;
+ }
+ }
+
+ private String getDictionaryNameAsSingleString(String[] dictName) {
+ StringBuilder sb = new StringBuilder();
+ for (String dn : dictName) {
+ if (sb.length() > 0) {
+ sb.append(" ");
+ }
+ sb.append(dn);
+ }
+ return sb.toString();
}
- private String getDictionaryName(SolrParams params) {
- String dictName = params.get(SPELLCHECK_DICT);
+ private String[] getDictionaryNames(SolrParams params) {
+ String[] dictName = params.getParams(SPELLCHECK_DICT);
if (dictName == null) {
- dictName = SolrSpellChecker.DEFAULT_DICTIONARY_NAME;
+ return new String[] {SolrSpellChecker.DEFAULT_DICTIONARY_NAME};
}
return dictName;
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/PossibilityIterator.java Mon Jun 4 18:07:04 2012
@@ -1,4 +1,5 @@
package org.apache.solr.spelling;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -18,12 +19,16 @@ package org.apache.solr.spelling;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.PriorityQueue;
+import java.util.Set;
import org.apache.lucene.analysis.Token;
@@ -37,161 +42,388 @@ import org.apache.lucene.analysis.Token;
* </p>
*
*/
-public class PossibilityIterator implements Iterator<RankedSpellPossibility> {
- private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
- private Iterator<RankedSpellPossibility> rankedPossibilityIterator = null;
- private int correctionIndex[];
- private boolean done = false;
-
- @SuppressWarnings("unused")
- private PossibilityIterator() {
- throw new AssertionError("You shan't go here.");
- }
-
- /**
- * <p>
- * We assume here that the passed-in inner LinkedHashMaps are already sorted
- * in order of "Best Possible Correction".
- * </p>
- *
- * @param suggestions
- */
- public PossibilityIterator(Map<Token, LinkedHashMap<String, Integer>> suggestions, int maximumRequiredSuggestions, int maxEvaluations) {
- for (Map.Entry<Token, LinkedHashMap<String, Integer>> entry : suggestions.entrySet()) {
- Token token = entry.getKey();
- if(entry.getValue().size()==0) {
- continue;
- }
- List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
- for (Map.Entry<String, Integer> entry1 : entry.getValue().entrySet()) {
- SpellCheckCorrection correction = new SpellCheckCorrection();
- correction.setOriginal(token);
- correction.setCorrection(entry1.getKey());
- correction.setNumberOfOccurences(entry1.getValue());
- possibleCorrections.add(correction);
- }
- possibilityList.add(possibleCorrections);
- }
-
- int wrapSize = possibilityList.size();
- if (wrapSize == 0) {
- done = true;
- } else {
- correctionIndex = new int[wrapSize];
- for (int i = 0; i < wrapSize; i++) {
- int suggestSize = possibilityList.get(i).size();
- if (suggestSize == 0) {
- done = true;
- break;
- }
- correctionIndex[i] = 0;
- }
- }
-
- long count = 0;
- PriorityQueue<RankedSpellPossibility> rankedPossibilities = new PriorityQueue<RankedSpellPossibility>();
- while (count < maxEvaluations && internalHasNext()) {
- RankedSpellPossibility rsp = internalNext();
- count++;
-
- if(rankedPossibilities.size() >= maximumRequiredSuggestions && rsp.getRank() >= rankedPossibilities.peek().getRank()) {
- continue;
- }
+public class PossibilityIterator implements
+ Iterator<PossibilityIterator.RankedSpellPossibility> {
+ private List<List<SpellCheckCorrection>> possibilityList = new ArrayList<List<SpellCheckCorrection>>();
+ private Iterator<RankedSpellPossibility> rankedPossibilityIterator = null;
+ private int correctionIndex[];
+ private boolean done = false;
+ private Iterator<List<SpellCheckCorrection>> nextOnes = null;
+ private int nextOnesRank = 0;
+ private int nextOnesIndex = 0;
+ private boolean suggestionsMayOverlap = false;
+
+ @SuppressWarnings("unused")
+ private PossibilityIterator() {
+ throw new AssertionError("You shan't go here.");
+ }
+
+ /**
+ * <p>
+ * We assume here that the passed-in inner LinkedHashMaps are already sorted
+ * in order of "Best Possible Correction".
+ * </p>
+ *
+ * @param suggestions
+ */
+ public PossibilityIterator(
+ Map<Token,LinkedHashMap<String,Integer>> suggestions,
+ int maximumRequiredSuggestions, int maxEvaluations, boolean overlap) {
+ this.suggestionsMayOverlap = overlap;
+ for (Map.Entry<Token,LinkedHashMap<String,Integer>> entry : suggestions
+ .entrySet()) {
+ Token token = entry.getKey();
+ if (entry.getValue().size() == 0) {
+ continue;
+ }
+ List<SpellCheckCorrection> possibleCorrections = new ArrayList<SpellCheckCorrection>();
+ for (Map.Entry<String,Integer> entry1 : entry.getValue().entrySet()) {
+ SpellCheckCorrection correction = new SpellCheckCorrection();
+ correction.setOriginal(token);
+ correction.setCorrection(entry1.getKey());
+ correction.setNumberOfOccurences(entry1.getValue());
+ possibleCorrections.add(correction);
+ }
+ possibilityList.add(possibleCorrections);
+ }
+
+ int wrapSize = possibilityList.size();
+ if (wrapSize == 0) {
+ done = true;
+ } else {
+ correctionIndex = new int[wrapSize];
+ for (int i = 0; i < wrapSize; i++) {
+ int suggestSize = possibilityList.get(i).size();
+ if (suggestSize == 0) {
+ done = true;
+ break;
+ }
+ correctionIndex[i] = 0;
+ }
+ }
+ PriorityQueue<RankedSpellPossibility> rankedPossibilities = new PriorityQueue<RankedSpellPossibility>(
+ 11, new RankComparator());
+ Set<RankedSpellPossibility> removeDuplicates = null;
+ if (suggestionsMayOverlap) {
+ removeDuplicates = new HashSet<RankedSpellPossibility>();
+ }
+ long numEvaluations = 0;
+ while (numEvaluations < maxEvaluations && internalHasNext()) {
+ RankedSpellPossibility rsp = internalNext();
+ numEvaluations++;
+ if (rankedPossibilities.size() >= maximumRequiredSuggestions
+ && rsp.rank >= rankedPossibilities.peek().rank) {
+ continue;
+ }
if (!isSuggestionForReal(rsp)) {
continue;
}
- rankedPossibilities.offer(rsp);
- if(rankedPossibilities.size() > maximumRequiredSuggestions) {
- rankedPossibilities.poll();
- }
- }
-
- RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities.size()];
- for(int i=rankedPossibilities.size() - 1 ; i>=0 ; i--) {
- rpArr[i] = rankedPossibilities.remove();
- }
- rankedPossibilityIterator = Arrays.asList(rpArr).iterator();
- }
-
+ if (removeDuplicates == null) {
+ rankedPossibilities.offer(rsp);
+ } else {
+ // Needs to be in token-offset order so that the match-and-replace
+ // option for collations can work.
+ Collections.sort(rsp.corrections, new StartOffsetComparator());
+ if (removeDuplicates.add(rsp)) {
+ rankedPossibilities.offer(rsp);
+ }
+ }
+ if (rankedPossibilities.size() > maximumRequiredSuggestions) {
+ RankedSpellPossibility removed = rankedPossibilities.poll();
+ if (removeDuplicates != null) {
+ removeDuplicates.remove(removed);
+ }
+ }
+ }
+
+ RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities
+ .size()];
+ for (int i = rankedPossibilities.size() - 1; i >= 0; i--) {
+ rpArr[i] = rankedPossibilities.remove();
+ }
+ rankedPossibilityIterator = Arrays.asList(rpArr).iterator();
+ }
+
private boolean isSuggestionForReal(RankedSpellPossibility rsp) {
- for (SpellCheckCorrection corr : rsp.getCorrections()) {
+ for (SpellCheckCorrection corr : rsp.corrections) {
if (!corr.getOriginalAsString().equals(corr.getCorrection())) {
return true;
}
}
return false;
}
-
- private boolean internalHasNext() {
- return !done;
- }
-
- /**
- * <p>
- * This method is converting the independent LinkHashMaps containing various
- * (silo'ed) suggestions for each mis-spelled word into individual
- * "holistic query corrections", aka. "Spell Check Possibility"
- * </p>
- * <p>
- * Rank here is the sum of each selected term's position in its respective
- * LinkedHashMap.
- * </p>
- *
- * @return
- */
- private RankedSpellPossibility internalNext() {
- if (done) {
- throw new NoSuchElementException();
- }
-
- List<SpellCheckCorrection> possibleCorrection = new ArrayList<SpellCheckCorrection>();
- int rank = 0;
- for (int i = 0; i < correctionIndex.length; i++) {
- List<SpellCheckCorrection> singleWordPossibilities = possibilityList.get(i);
- SpellCheckCorrection singleWordPossibility = singleWordPossibilities.get(correctionIndex[i]);
- rank += correctionIndex[i];
-
- if (i == correctionIndex.length - 1) {
- correctionIndex[i]++;
- if (correctionIndex[i] == singleWordPossibilities.size()) {
- correctionIndex[i] = 0;
- if (correctionIndex.length == 1) {
- done = true;
- }
- for (int ii = i - 1; ii >= 0; ii--) {
- correctionIndex[ii]++;
- if (correctionIndex[ii] >= possibilityList.get(ii).size() && ii > 0) {
- correctionIndex[ii] = 0;
- } else {
- break;
- }
- }
- }
- }
- possibleCorrection.add(singleWordPossibility);
- }
-
- if(correctionIndex[0] == possibilityList.get(0).size())
- {
- done = true;
- }
-
- RankedSpellPossibility rsl = new RankedSpellPossibility();
- rsl.setCorrections(possibleCorrection);
- rsl.setRank(rank);
- return rsl;
- }
-
- public boolean hasNext() {
- return rankedPossibilityIterator.hasNext();
- }
-
- public RankedSpellPossibility next() {
- return rankedPossibilityIterator.next();
- }
-
- public void remove() {
- throw new UnsupportedOperationException();
- }
-
+
+ private boolean internalHasNext() {
+ if (nextOnes != null && nextOnes.hasNext()) {
+ return true;
+ }
+ if (done) {
+ return false;
+ }
+ internalNextAdvance();
+ if (nextOnes != null && nextOnes.hasNext()) {
+ return true;
+ }
+ return false;
+ }
+
+ /**
+ * <p>
+ * This method is converting the independent LinkHashMaps containing various
+ * (silo'ed) suggestions for each mis-spelled word into individual
+ * "holistic query corrections", aka. "Spell Check Possibility"
+ * </p>
+ * <p>
+ * Rank here is the sum of each selected term's position in its respective
+ * LinkedHashMap.
+ * </p>
+ *
+ * @return
+ */
+ private RankedSpellPossibility internalNext() {
+ if (nextOnes != null && nextOnes.hasNext()) {
+ RankedSpellPossibility rsl = new RankedSpellPossibility();
+ rsl.corrections = nextOnes.next();
+ rsl.rank = nextOnesRank;
+ rsl.index = nextOnesIndex++;
+ return rsl;
+ }
+ if (done) {
+ throw new NoSuchElementException();
+ }
+ internalNextAdvance();
+ if (nextOnes != null && nextOnes.hasNext()) {
+ RankedSpellPossibility rsl = new RankedSpellPossibility();
+ rsl.corrections = nextOnes.next();
+ rsl.rank = nextOnesRank;
+ rsl.index = nextOnesIndex++;
+ return rsl;
+ }
+ throw new NoSuchElementException();
+ }
+
+ private void internalNextAdvance() {
+ List<SpellCheckCorrection> possibleCorrection = null;
+ if (nextOnes != null && nextOnes.hasNext()) {
+ possibleCorrection = nextOnes.next();
+ } else {
+ if (done) {
+ throw new NoSuchElementException();
+ }
+ possibleCorrection = new ArrayList<SpellCheckCorrection>();
+ List<List<SpellCheckCorrection>> possibleCorrections = null;
+ int rank = 0;
+ while (!done
+ && (possibleCorrections == null || possibleCorrections.size() == 0)) {
+ rank = 0;
+ for (int i = 0; i < correctionIndex.length; i++) {
+ List<SpellCheckCorrection> singleWordPossibilities = possibilityList
+ .get(i);
+ SpellCheckCorrection singleWordPossibility = singleWordPossibilities
+ .get(correctionIndex[i]);
+ rank += correctionIndex[i];
+ if (i == correctionIndex.length - 1) {
+ correctionIndex[i]++;
+ if (correctionIndex[i] == singleWordPossibilities.size()) {
+ correctionIndex[i] = 0;
+ if (correctionIndex.length == 1) {
+ done = true;
+ }
+ for (int ii = i - 1; ii >= 0; ii--) {
+ correctionIndex[ii]++;
+ if (correctionIndex[ii] >= possibilityList.get(ii).size()
+ && ii > 0) {
+ correctionIndex[ii] = 0;
+ } else {
+ break;
+ }
+ }
+ }
+ }
+ possibleCorrection.add(singleWordPossibility);
+ }
+ if (correctionIndex[0] == possibilityList.get(0).size()) {
+ done = true;
+ }
+ if (suggestionsMayOverlap) {
+ possibleCorrections = separateOverlappingTokens(possibleCorrection);
+ } else {
+ possibleCorrections = new ArrayList<List<SpellCheckCorrection>>(1);
+ possibleCorrections.add(possibleCorrection);
+ }
+ }
+ nextOnes = possibleCorrections.iterator();
+ nextOnesRank = rank;
+ nextOnesIndex = 0;
+ }
+ }
+
+ private List<List<SpellCheckCorrection>> separateOverlappingTokens(
+ List<SpellCheckCorrection> possibleCorrection) {
+ List<List<SpellCheckCorrection>> ret = null;
+ if (possibleCorrection.size() == 1) {
+ ret = new ArrayList<List<SpellCheckCorrection>>(1);
+ ret.add(possibleCorrection);
+ return ret;
+ }
+ ret = new ArrayList<List<SpellCheckCorrection>>();
+ for (int i = 0; i < possibleCorrection.size(); i++) {
+ List<SpellCheckCorrection> c = compatible(possibleCorrection, i);
+ ret.add(c);
+ }
+ return ret;
+ }
+
+ private List<SpellCheckCorrection> compatible(List<SpellCheckCorrection> all,
+ int pos) {
+ List<SpellCheckCorrection> priorPassCompatibles = null;
+ {
+ List<SpellCheckCorrection> firstPassCompatibles = new ArrayList<SpellCheckCorrection>(
+ all.size());
+ SpellCheckCorrection sacred = all.get(pos);
+ firstPassCompatibles.add(sacred);
+ int index = pos;
+ boolean gotOne = false;
+ for (int i = 0; i < all.size() - 1; i++) {
+ index++;
+ if (index == all.size()) {
+ index = 0;
+ }
+ SpellCheckCorrection disposable = all.get(index);
+ if (!conflicts(sacred, disposable)) {
+ firstPassCompatibles.add(disposable);
+ gotOne = true;
+ }
+ }
+ if (!gotOne) {
+ return firstPassCompatibles;
+ }
+ priorPassCompatibles = firstPassCompatibles;
+ }
+
+ {
+ pos = 1;
+ while (true) {
+ if (pos == priorPassCompatibles.size() - 1) {
+ return priorPassCompatibles;
+ }
+ List<SpellCheckCorrection> subsequentPassCompatibles = new ArrayList<SpellCheckCorrection>(
+ priorPassCompatibles.size());
+ SpellCheckCorrection sacred = null;
+ for (int i = 0; i <= pos; i++) {
+ sacred = priorPassCompatibles.get(i);
+ subsequentPassCompatibles.add(sacred);
+ }
+ int index = pos;
+ boolean gotOne = false;
+ for (int i = 0; i < priorPassCompatibles.size() - 1; i++) {
+ index++;
+ if (index == priorPassCompatibles.size()) {
+ break;
+ }
+ SpellCheckCorrection disposable = priorPassCompatibles.get(index);
+ if (!conflicts(sacred, disposable)) {
+ subsequentPassCompatibles.add(disposable);
+ gotOne = true;
+ }
+ }
+ if (!gotOne || pos == priorPassCompatibles.size() - 1) {
+ return subsequentPassCompatibles;
+ }
+ priorPassCompatibles = subsequentPassCompatibles;
+ pos++;
+ }
+ }
+ }
+
+ private boolean conflicts(SpellCheckCorrection c1, SpellCheckCorrection c2) {
+ int s1 = c1.getOriginal().startOffset();
+ int e1 = c1.getOriginal().endOffset();
+ int s2 = c2.getOriginal().startOffset();
+ int e2 = c2.getOriginal().endOffset();
+ if (s2 >= s1 && s2 <= e1) {
+ return true;
+ }
+ if (s1 >= s2 && s1 <= e2) {
+ return true;
+ }
+ return false;
+ }
+
+ public boolean hasNext() {
+ return rankedPossibilityIterator.hasNext();
+ }
+
+ public PossibilityIterator.RankedSpellPossibility next() {
+ return rankedPossibilityIterator.next();
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ public class RankedSpellPossibility {
+ public List<SpellCheckCorrection> corrections;
+ public int rank;
+ public int index;
+
+ @Override
+ // hashCode() and equals() only consider the actual correction, not the rank
+ // or index.
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result
+ + ((corrections == null) ? 0 : corrections.hashCode());
+ return result;
+ }
+
+ @Override
+ // hashCode() and equals() only consider the actual correction, not the rank
+ // or index.
+ public boolean equals(Object obj) {
+ if (this == obj) return true;
+ if (obj == null) return false;
+ if (getClass() != obj.getClass()) return false;
+ RankedSpellPossibility other = (RankedSpellPossibility) obj;
+ if (corrections == null) {
+ if (other.corrections != null) return false;
+ } else if (!corrections.equals(other.corrections)) return false;
+ return true;
+ }
+
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("rank=").append(rank).append(" (").append(index).append(")");
+ if (corrections != null) {
+ for (SpellCheckCorrection corr : corrections) {
+ sb.append(" ");
+ sb.append(corr.getOriginal()).append(">")
+ .append(corr.getCorrection()).append(" (").append(
+ corr.getNumberOfOccurences()).append(")");
+ }
+ }
+ return sb.toString();
+ }
+ }
+
+ private class StartOffsetComparator implements
+ Comparator<SpellCheckCorrection> {
+ @Override
+ public int compare(SpellCheckCorrection o1, SpellCheckCorrection o2) {
+ return o1.getOriginal().startOffset() - o2.getOriginal().startOffset();
+ }
+ }
+
+ private class RankComparator implements Comparator<RankedSpellPossibility> {
+ // Rank poorer suggestions ahead of better ones for use with a PriorityQueue
+ public int compare(RankedSpellPossibility r1, RankedSpellPossibility r2) {
+ int retval = r2.rank - r1.rank;
+ if (retval == 0) {
+ retval = r2.index - r1.index;
+ }
+ return retval;
+ }
+ }
+
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/QueryConverter.java Mon Jun 4 18:07:04 2012
@@ -50,7 +50,32 @@ public abstract class QueryConverter imp
private NamedList args;
protected Analyzer analyzer;
-
+
+ /**
+ * <p>This term is marked prohibited in the query with the minus sign.</p>
+ *
+ */
+ public static final int PROHIBITED_TERM_FLAG = 16384;
+ /**
+ * <p>This term is marked required in the query with the plus sign.</p>
+ */
+ public static final int REQUIRED_TERM_FLAG = 32768;
+ /**
+ * <p>
+ * This term is directly followed by a boolean operator (AND/OR/NOT)
+ * and this operator differs from the prior boolean operator
+ * in the query (this signifies this term is likely part of a different
+ * query clause than the next term in the query)
+ * </p>
+ */
+ public static final int TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG = 65536;
+ /**
+ * <p>
+ * This term exists in a query that contains boolean operators
+ * (AND/OR/NOT)
+ * </p>
+ */
+ public static final int TERM_IN_BOOLEAN_QUERY_FLAG = 131072;
public void init(NamedList args) {
this.args = args;
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SolrSpellChecker.java Mon Jun 4 18:07:04 2012
@@ -92,7 +92,12 @@ public abstract class SolrSpellChecker {
//just use .5 as a default
}
- StringDistance sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
+ StringDistance sd = null;
+ try {
+ sd = getStringDistance() == null ? new LevensteinDistance() : getStringDistance();
+ } catch(UnsupportedOperationException uoe) {
+ sd = new LevensteinDistance();
+ }
SpellingResult result = new SpellingResult();
for (Map.Entry<String, HashSet<String>> entry : mergeData.origVsSuggested.entrySet()) {
@@ -190,4 +195,8 @@ public abstract class SolrSpellChecker {
* @throws IOException if there is an error producing suggestions
*/
public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException;
+
+ public boolean isSuggestionsMayOverlap() {
+ return false;
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellCheckCollator.java Mon Jun 4 18:07:04 2012
@@ -37,7 +37,7 @@ public class SpellCheckCollator {
private static final Logger LOG = LoggerFactory.getLogger(SpellCheckCollator.class);
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse,
- int maxCollations, int maxTries, int maxEvaluations) {
+ int maxCollations, int maxTries, int maxEvaluations, boolean suggestionsMayOverlap) {
List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>();
QueryComponent queryComponent = null;
@@ -51,8 +51,10 @@ public class SpellCheckCollator {
}
boolean verifyCandidateWithQuery = true;
+ int maxNumberToIterate = maxTries;
if (maxTries < 1) {
maxTries = 1;
+ maxNumberToIterate = maxCollations;
verifyCandidateWithQuery = false;
}
if (queryComponent == null && verifyCandidateWithQuery) {
@@ -63,11 +65,11 @@ public class SpellCheckCollator {
int tryNo = 0;
int collNo = 0;
- PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxTries, maxEvaluations);
+ PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxEvaluations, suggestionsMayOverlap);
while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
- RankedSpellPossibility possibility = possibilityIter.next();
- String collationQueryStr = getCollation(originalQuery, possibility.getCorrections());
+ PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
+ String collationQueryStr = getCollation(originalQuery, possibility.corrections);
int hits = 0;
if (verifyCandidateWithQuery) {
@@ -102,10 +104,10 @@ public class SpellCheckCollator {
SpellCheckCollation collation = new SpellCheckCollation();
collation.setCollationQuery(collationQueryStr);
collation.setHits(hits);
- collation.setInternalRank(possibility.getRank());
+ collation.setInternalRank(suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);
NamedList<String> misspellingsAndCorrections = new NamedList<String>();
- for (SpellCheckCorrection corr : possibility.getCorrections()) {
+ for (SpellCheckCorrection corr : possibility.corrections) {
misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
}
collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
@@ -122,16 +124,53 @@ public class SpellCheckCollator {
List<SpellCheckCorrection> corrections) {
StringBuilder collation = new StringBuilder(origQuery);
int offset = 0;
- for (SpellCheckCorrection correction : corrections) {
+ String corr = "";
+ for(int i=0 ; i<corrections.size() ; i++) {
+ SpellCheckCorrection correction = corrections.get(i);
Token tok = correction.getOriginal();
// we are replacing the query in order, but injected terms might cause
// illegal offsets due to previous replacements.
if (tok.getPositionIncrement() == 0)
continue;
- collation.replace(tok.startOffset() + offset, tok.endOffset() + offset,
- correction.getCorrection());
- offset += correction.getCorrection().length()
- - (tok.endOffset() - tok.startOffset());
+ corr = correction.getCorrection();
+ boolean addParenthesis = false;
+ Character requiredOrProhibited = null;
+ int indexOfSpace = corr.indexOf(' ');
+ StringBuilder corrSb = new StringBuilder(corr);
+ int bump = 1;
+
+ //If the correction contains whitespace (because it involved breaking a word in 2+ words),
+ //then be sure all of the new words have the same optional/required/prohibited status in the query.
+ while(indexOfSpace>-1 && indexOfSpace<corr.length()-1) {
+ addParenthesis = true;
+ char previousChar = tok.startOffset()>0 ? collation.charAt(tok.startOffset()-1) : ' ';
+ if(previousChar=='-' || previousChar=='+') {
+ corrSb.insert(indexOfSpace + bump, previousChar);
+ if(requiredOrProhibited==null) {
+ requiredOrProhibited = previousChar;
+ }
+ bump++;
+ } else if ((tok.getFlags() & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) {
+ corrSb.insert(indexOfSpace + bump, "AND ");
+ bump += 4;
+ }
+ indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump);
+ }
+
+ int oneForReqOrProhib = 0;
+ if(addParenthesis) {
+ if(requiredOrProhibited!=null) {
+ corrSb.insert(0, requiredOrProhibited);
+ oneForReqOrProhib++;
+ }
+ corrSb.insert(0, '(');
+ corrSb.append(')');
+ }
+ corr = corrSb.toString();
+ int startIndex = tok.startOffset() + offset - oneForReqOrProhib;
+ int endIndex = tok.endOffset() + offset;
+ collation.replace(startIndex, endIndex, corr);
+ offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
}
return collation.toString();
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Mon Jun 4 18:07:04 2012
@@ -38,8 +38,18 @@ import org.apache.lucene.analysis.tokena
/**
* Converts the query string to a Collection of Lucene tokens using a regular expression.
- * Boolean operators AND and OR are skipped.
- *
+ * Boolean operators AND, OR, NOT are skipped.
+ *
+ * Each term is checked to determine if it is optional, required or prohibited. Required
+ * terms output a {@link Token} with the {@link QueryConverter#REQUIRED_TERM_FLAG} set.
+ * Prohibited terms output a {@link Token} with the {@link QueryConverter#PROHIBITED_TERM_FLAG}
+ * set. If the query uses the plus (+) and minus (-) to denote required and prohibited, this
+ * determination will be accurate. In the case boolean AND/OR/NOTs are used, this
+ * converter makes an uninformed guess as to whether the term would likely behave as if it
+ * is Required or Prohibited and sets the flags accordingly. These flags are used downstream
+ * to generate collations for {@link WordBreakSolrSpellChecker}, in cases where an original
+ * term is split up into multiple Tokens.
+ *
* @since solr 1.3
**/
public class SpellingQueryConverter extends QueryConverter {
@@ -86,8 +96,7 @@ public class SpellingQueryConverter exte
final static String PATTERN = "(?:(?!(" + NMTOKEN + ":|\\d+)))[\\p{L}_\\-0-9]+";
// previous version: Pattern.compile("(?:(?!(\\w+:|\\d+)))\\w+");
protected Pattern QUERY_REGEX = Pattern.compile(PATTERN);
-
-
+
/**
* Converts the original query string to a collection of Lucene Tokens.
* @param original the original query string
@@ -99,37 +108,87 @@ public class SpellingQueryConverter exte
return Collections.emptyList();
}
Collection<Token> result = new ArrayList<Token>();
- //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream
Matcher matcher = QUERY_REGEX.matcher(original);
- while (matcher.find()) {
- String word = matcher.group(0);
- if (word.equals("AND") == false && word.equals("OR") == false) {
- try {
- analyze(result, new StringReader(word), matcher.start());
- } catch (IOException e) {
- // TODO: shouldn't we log something?
- }
+ String nextWord = null;
+ int nextStartIndex = 0;
+ String lastBooleanOp = null;
+ while (nextWord!=null || matcher.find()) {
+ String word = null;
+ int startIndex = 0;
+ if(nextWord != null) {
+ word = nextWord;
+ startIndex = nextStartIndex;
+ nextWord = null;
+ } else {
+ word = matcher.group(0);
+ startIndex = matcher.start();
+ }
+ if(matcher.find()) {
+ nextWord = matcher.group(0);
+ nextStartIndex = matcher.start();
+ }
+ if("AND".equals(word) || "OR".equals(word) || "NOT".equals(word)) {
+ lastBooleanOp = word;
+ continue;
+ }
+ // treat "AND NOT" as "NOT"...
+ if ("AND".equals(nextWord)
+ && original.length() > nextStartIndex + 7
+ && original.substring(nextStartIndex, nextStartIndex + 7).equals(
+ "AND NOT")) {
+ nextWord = "NOT";
+ }
+
+ int flagValue = 0;
+ if (word.charAt(0) == '-'
+ || (startIndex > 0 && original.charAt(startIndex - 1) == '-')) {
+ flagValue = PROHIBITED_TERM_FLAG;
+ } else if (word.charAt(0) == '+'
+ || (startIndex > 0 && original.charAt(startIndex - 1) == '+')) {
+ flagValue = REQUIRED_TERM_FLAG;
+ //we don't know the default operator so just assume the first operator isn't new.
+ } else if (nextWord != null
+ && lastBooleanOp != null
+ && !nextWord.equals(lastBooleanOp)
+ && ("AND".equals(nextWord) || "OR".equals(nextWord) || "NOT".equals(nextWord))) {
+ flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
+ //...unless the 1st boolean operator is a NOT, because only AND/OR can be default.
+ } else if (nextWord != null
+ && lastBooleanOp == null
+ && !nextWord.equals(lastBooleanOp)
+ && ("NOT".equals(nextWord))) {
+ flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
+ }
+ try {
+ analyze(result, new StringReader(word), startIndex, flagValue);
+ } catch (IOException e) {
+ // TODO: shouldn't we log something?
+ }
+ }
+ if(lastBooleanOp != null) {
+ for(Token t : result) {
+ int f = t.getFlags();
+ t.setFlags(f |= QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG);
}
}
return result;
}
- protected void analyze(Collection<Token> result, Reader text, int offset) throws IOException {
+ protected void analyze(Collection<Token> result, Reader text, int offset, int flagsAttValue) throws IOException {
TokenStream stream = analyzer.tokenStream("", text);
// TODO: support custom attributes
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
- FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
stream.reset();
- while (stream.incrementToken()) {
+ while (stream.incrementToken()) {
Token token = new Token();
token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setStartOffset(offset + offsetAtt.startOffset());
token.setEndOffset(offset + offsetAtt.endOffset());
- token.setFlags(flagsAtt.getFlags());
+ token.setFlags(flagsAttValue); //overwriting any flags already set...
token.setType(typeAtt.type());
token.setPayload(payloadAtt.getPayload());
token.setPositionIncrement(posIncAtt.getPositionIncrement());
Modified: lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/java/org/apache/solr/spelling/SuggestQueryConverter.java Mon Jun 4 18:07:04 2012
@@ -38,7 +38,7 @@ public class SuggestQueryConverter exten
Collection<Token> result = new ArrayList<Token>();
try {
- analyze(result, new StringReader(original), 0);
+ analyze(result, new StringReader(original), 0, 0);
} catch (IOException e) {
throw new RuntimeException(e);
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig-spellcheckcomponent.xml Mon Jun 4 18:07:04 2012
@@ -70,6 +70,14 @@ Config for testing spellcheck component
<str name="field">lowerfilt</str>
</lst>
<lst name="spellchecker">
+ <str name="name">wordbreak</str>
+ <str name="classname">solr.WordBreakSolrSpellChecker</str>
+ <str name="field">lowerfilt</str>
+ <str name="combineWords">true</str>
+ <str name="breakWords">true</str>
+ <int name="maxChanges">10</int>
+ </lst>
+ <lst name="spellchecker">
<str name="name">threshold</str>
<str name="field">lowerfilt</str>
<str name="spellcheckIndexDir">spellcheckerThreshold</str>
@@ -161,5 +169,15 @@ Config for testing spellcheck component
<str>spellcheck</str>
</arr>
</requestHandler>
+ <requestHandler name="spellCheckWithWordbreak" class="org.apache.solr.handler.component.SearchHandler">
+ <lst name="defaults">
+ <str name="spellcheck.dictionary">default</str>
+ <str name="spellcheck.dictionary">wordbreak</str>
+ <str name="spellcheck.count">20</str>
+ </lst>
+ <arr name="last-components">
+ <str>spellcheck</str>
+ </arr>
+ </requestHandler>
</config>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig.xml?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test-files/solr/conf/solrconfig.xml Mon Jun 4 18:07:04 2012
@@ -280,6 +280,14 @@
<int name="minQueryLength">3</int>
</lst>
<lst name="spellchecker">
+ <str name="name">wordbreak</str>
+ <str name="classname">solr.WordBreakSolrSpellChecker</str>
+ <str name="field">lowerfilt</str>
+ <str name="combineWords">true</str>
+ <str name="breakWords">true</str>
+ <int name="maxChanges">10</int>
+ </lst>
+ <lst name="spellchecker">
<str name="name">multipleFields</str>
<str name="field">lowerfilt1and2</str>
<str name="spellcheckIndexDir">spellcheckerMultipleFields</str>
@@ -365,6 +373,26 @@
<str>spellcheck</str>
</arr>
</requestHandler>
+ <requestHandler name="spellCheckWithWordbreak" class="org.apache.solr.handler.component.SearchHandler">
+ <lst name="defaults">
+ <str name="spellcheck.dictionary">default</str>
+ <str name="spellcheck.dictionary">wordbreak</str>
+ <str name="spellcheck.count">20</str>
+ </lst>
+ <arr name="last-components">
+ <str>spellcheck</str>
+ </arr>
+ </requestHandler>
+ <requestHandler name="spellCheckWithWordbreak_Direct" class="org.apache.solr.handler.component.SearchHandler">
+ <lst name="defaults">
+ <str name="spellcheck.dictionary">direct</str>
+ <str name="spellcheck.dictionary">wordbreak</str>
+ <str name="spellcheck.count">20</str>
+ </lst>
+ <arr name="last-components">
+ <str>spellcheck</str>
+ </arr>
+ </requestHandler>
<requestHandler name="spellCheckCompRH1" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<str name="defType">dismax</str>
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/handler/component/DistributedSpellCheckComponentTest.java Mon Jun 4 18:07:04 2012
@@ -37,6 +37,7 @@ import org.apache.solr.common.util.Named
public class DistributedSpellCheckComponentTest extends BaseDistributedSearchTestCase {
private String requestHandlerName;
+ private String reqHandlerWithWordbreak;
public DistributedSpellCheckComponentTest()
{
@@ -52,7 +53,13 @@ public class DistributedSpellCheckCompon
// this test requires FSDir
saveProp = System.getProperty("solr.directoryFactory");
System.setProperty("solr.directoryFactory", "solr.StandardDirectoryFactory");
- requestHandlerName = random().nextBoolean() ? "spellCheckCompRH" : "spellCheckCompRH_Direct";
+ if(random().nextBoolean()) {
+ requestHandlerName = "spellCheckCompRH";
+ reqHandlerWithWordbreak = "spellCheckWithWordbreak";
+ } else {
+ requestHandlerName = "spellCheckCompRH_Direct";
+ reqHandlerWithWordbreak = "spellCheckWithWordbreak_Direct";
+ }
super.setUp();
}
@@ -141,5 +148,7 @@ public class DistributedSpellCheckCompon
query("q", "lowerfilt:(\"quote red fox\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
query("q", "lowerfilt:(\"rod fix\")", "fl", "id,lowerfilt", "spellcheck", "true", "qt", "spellCheckCompRH", "shards.qt", "spellCheckCompRH", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "10", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_ALTERNATIVE_TERM_COUNT, "5", SpellCheckComponent.SPELLCHECK_MAX_RESULTS_FOR_SUGGEST, "10");
+
+ query("q", "lowerfilt:(+quock +redfox +jum +ped)", "fl", "id,lowerfilt", "spellcheck", "true", "qt", reqHandlerWithWordbreak, "shards.qt", reqHandlerWithWordbreak, SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COUNT, "10", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATION_TRIES, "0", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true");
}
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellPossibilityIteratorTest.java Mon Jun 4 18:07:04 2012
@@ -16,8 +16,10 @@ package org.apache.solr.spelling;
* limitations under the License.
*/
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.Map;
+import java.util.Set;
import org.apache.lucene.analysis.Token;
import org.apache.solr.SolrTestCaseJ4;
@@ -26,17 +28,22 @@ import org.junit.Before;
import org.junit.Test;
public class SpellPossibilityIteratorTest extends SolrTestCaseJ4 {
-
- private static Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
- private static Map<Token, LinkedHashMap<String, Integer>> lotsaSuggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
-
+ private static final Token TOKEN_AYE = new Token("AYE", 0, 3);
+ private static final Token TOKEN_BEE = new Token("BEE", 4, 7);
+ private static final Token TOKEN_AYE_BEE = new Token("AYE BEE", 0, 7);
+ private static final Token TOKEN_CEE = new Token("CEE", 8, 11);
+
+ private LinkedHashMap<String, Integer> AYE;
+ private LinkedHashMap<String, Integer> BEE;
+ private LinkedHashMap<String, Integer> AYE_BEE;
+ private LinkedHashMap<String, Integer> CEE;
+
@Override
@Before
public void setUp() throws Exception {
super.setUp();
- suggestions.clear();
- LinkedHashMap<String, Integer> AYE = new LinkedHashMap<String, Integer>();
+ AYE = new LinkedHashMap<String, Integer>();
AYE.put("I", 0);
AYE.put("II", 0);
AYE.put("III", 0);
@@ -46,7 +53,7 @@ public class SpellPossibilityIteratorTes
AYE.put("VII", 0);
AYE.put("VIII", 0);
- LinkedHashMap<String, Integer> BEE = new LinkedHashMap<String, Integer>();
+ BEE = new LinkedHashMap<String, Integer>();
BEE.put("alpha", 0);
BEE.put("beta", 0);
BEE.put("gamma", 0);
@@ -57,8 +64,19 @@ public class SpellPossibilityIteratorTes
BEE.put("theta", 0);
BEE.put("iota", 0);
+ AYE_BEE = new LinkedHashMap<String, Integer>();
+ AYE_BEE.put("one-alpha", 0);
+ AYE_BEE.put("two-beta", 0);
+ AYE_BEE.put("three-gamma", 0);
+ AYE_BEE.put("four-delta", 0);
+ AYE_BEE.put("five-epsilon", 0);
+ AYE_BEE.put("six-zeta", 0);
+ AYE_BEE.put("seven-eta", 0);
+ AYE_BEE.put("eight-theta", 0);
+ AYE_BEE.put("nine-iota", 0);
+
- LinkedHashMap<String, Integer> CEE = new LinkedHashMap<String, Integer>();
+ CEE = new LinkedHashMap<String, Integer>();
CEE.put("one", 0);
CEE.put("two", 0);
CEE.put("three", 0);
@@ -69,61 +87,75 @@ public class SpellPossibilityIteratorTes
CEE.put("eight", 0);
CEE.put("nine", 0);
CEE.put("ten", 0);
-
- suggestions.put(new Token("AYE", 0, 2), AYE);
- suggestions.put(new Token("BEE", 0, 2), BEE);
- suggestions.put(new Token("CEE", 0, 2), CEE);
-
- lotsaSuggestions.put(new Token("AYE", 0, 2), AYE);
- lotsaSuggestions.put(new Token("BEE", 0, 2), BEE);
- lotsaSuggestions.put(new Token("CEE", 0, 2), CEE);
-
- lotsaSuggestions.put(new Token("AYE1", 0, 3), AYE);
- lotsaSuggestions.put(new Token("BEE1", 0, 3), BEE);
- lotsaSuggestions.put(new Token("CEE1", 0, 3), CEE);
-
- lotsaSuggestions.put(new Token("AYE2", 0, 3), AYE);
- lotsaSuggestions.put(new Token("BEE2", 0, 3), BEE);
- lotsaSuggestions.put(new Token("CEE2", 0, 3), CEE);
-
- lotsaSuggestions.put(new Token("AYE3", 0, 3), AYE);
- lotsaSuggestions.put(new Token("BEE3", 0, 3), BEE);
- lotsaSuggestions.put(new Token("CEE3", 0, 3), CEE);
-
- lotsaSuggestions.put(new Token("AYE4", 0, 3), AYE);
- lotsaSuggestions.put(new Token("BEE4", 0, 3), BEE);
- lotsaSuggestions.put(new Token("CEE4", 0, 3), CEE);
}
@Test
public void testScalability() throws Exception {
- PossibilityIterator iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000);
+ Map<Token, LinkedHashMap<String, Integer>> lotsaSuggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
+ lotsaSuggestions.put(TOKEN_AYE , AYE);
+ lotsaSuggestions.put(TOKEN_BEE , BEE);
+ lotsaSuggestions.put(TOKEN_CEE , CEE);
+
+ lotsaSuggestions.put(new Token("AYE1", 0, 3), AYE);
+ lotsaSuggestions.put(new Token("BEE1", 4, 7), BEE);
+ lotsaSuggestions.put(new Token("CEE1", 8, 11), CEE);
+
+ lotsaSuggestions.put(new Token("AYE2", 0, 3), AYE);
+ lotsaSuggestions.put(new Token("BEE2", 4, 7), BEE);
+ lotsaSuggestions.put(new Token("CEE2", 8, 11), CEE);
+
+ lotsaSuggestions.put(new Token("AYE3", 0, 3), AYE);
+ lotsaSuggestions.put(new Token("BEE3", 4, 7), BEE);
+ lotsaSuggestions.put(new Token("CEE3", 8, 11), CEE);
+
+ lotsaSuggestions.put(new Token("AYE4", 0, 3), AYE);
+ lotsaSuggestions.put(new Token("BEE4", 4, 7), BEE);
+ lotsaSuggestions.put(new Token("CEE4", 8, 11), CEE);
+
+ PossibilityIterator iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000, false);
int count = 0;
while (iter.hasNext()) {
- RankedSpellPossibility rsp = iter.next();
+ PossibilityIterator.RankedSpellPossibility rsp = iter.next();
count++;
}
assertTrue(count==1000);
+
+ lotsaSuggestions.put(new Token("AYE_BEE1", 0, 7), AYE_BEE);
+ lotsaSuggestions.put(new Token("AYE_BEE2", 0, 7), AYE_BEE);
+ lotsaSuggestions.put(new Token("AYE_BEE3", 0, 7), AYE_BEE);
+ lotsaSuggestions.put(new Token("AYE_BEE4", 0, 7), AYE_BEE);
+ iter = new PossibilityIterator(lotsaSuggestions, 1000, 10000, true);
+ count = 0;
+ while (iter.hasNext()) {
+ PossibilityIterator.RankedSpellPossibility rsp = iter.next();
+ count++;
+ }
+ assertTrue(count<100);
}
@Test
public void testSpellPossibilityIterator() throws Exception {
- PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000);
+ Map<Token, LinkedHashMap<String, Integer>> suggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
+ suggestions.put(TOKEN_AYE , AYE);
+ suggestions.put(TOKEN_BEE , BEE);
+ suggestions.put(TOKEN_CEE , CEE);
+
+ PossibilityIterator iter = new PossibilityIterator(suggestions, 1000, 10000, false);
int count = 0;
while (iter.hasNext()) {
- RankedSpellPossibility rsp = iter.next();
+ PossibilityIterator.RankedSpellPossibility rsp = iter.next();
if(count==0) {
- assertTrue("I".equals(rsp.getCorrections().get(0).getCorrection()));
- assertTrue("alpha".equals(rsp.getCorrections().get(1).getCorrection()));
- assertTrue("one".equals(rsp.getCorrections().get(2).getCorrection()));
+ assertTrue("I".equals(rsp.corrections.get(0).getCorrection()));
+ assertTrue("alpha".equals(rsp.corrections.get(1).getCorrection()));
+ assertTrue("one".equals(rsp.corrections.get(2).getCorrection()));
}
count++;
}
assertTrue(("Three maps (8*9*10) should return 720 iterations but instead returned " + count), count == 720);
- suggestions.remove(new Token("CEE", 0, 2));
- iter = new PossibilityIterator(suggestions, 100, 10000);
+ suggestions.remove(TOKEN_CEE);
+ iter = new PossibilityIterator(suggestions, 100, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
@@ -131,8 +163,8 @@ public class SpellPossibilityIteratorTes
}
assertTrue(("Two maps (8*9) should return 72 iterations but instead returned " + count), count == 72);
- suggestions.remove(new Token("BEE", 0, 2));
- iter = new PossibilityIterator(suggestions, 5, 10000);
+ suggestions.remove(TOKEN_BEE);
+ iter = new PossibilityIterator(suggestions, 5, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
@@ -140,8 +172,8 @@ public class SpellPossibilityIteratorTes
}
assertTrue(("We requested 5 suggestions but got " + count), count == 5);
- suggestions.remove(new Token("AYE", 0, 2));
- iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000);
+ suggestions.remove(TOKEN_AYE);
+ iter = new PossibilityIterator(suggestions, Integer.MAX_VALUE, 10000, false);
count = 0;
while (iter.hasNext()) {
iter.next();
@@ -150,4 +182,47 @@ public class SpellPossibilityIteratorTes
assertTrue(("No maps should return 0 iterations but instead returned " + count), count == 0);
}
+
+ @Test
+ public void testOverlappingTokens() throws Exception {
+ Map<Token, LinkedHashMap<String, Integer>> overlappingSuggestions = new LinkedHashMap<Token, LinkedHashMap<String, Integer>>();
+ overlappingSuggestions.put(TOKEN_AYE, AYE);
+ overlappingSuggestions.put(TOKEN_BEE, BEE);
+ overlappingSuggestions.put(TOKEN_AYE_BEE, AYE_BEE);
+ overlappingSuggestions.put(TOKEN_CEE, CEE);
+
+ PossibilityIterator iter = new PossibilityIterator(overlappingSuggestions, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
+ int aCount = 0;
+ int abCount = 0;
+ Set<PossibilityIterator.RankedSpellPossibility> dupChecker = new HashSet<PossibilityIterator.RankedSpellPossibility>();
+ while (iter.hasNext()) {
+ PossibilityIterator.RankedSpellPossibility rsp = iter.next();
+ Token a = null;
+ Token b = null;
+ Token ab = null;
+ Token c = null;
+ for(SpellCheckCorrection scc : rsp.corrections) {
+ if(scc.getOriginal().equals(TOKEN_AYE)) {
+ a = scc.getOriginal();
+ } else if(scc.getOriginal().equals(TOKEN_BEE)) {
+ b = scc.getOriginal();
+ } else if(scc.getOriginal().equals(TOKEN_AYE_BEE)) {
+ ab = scc.getOriginal();
+ } else if(scc.getOriginal().equals(TOKEN_CEE)) {
+ c = scc.getOriginal();
+ }
+ if(ab!=null) {
+ abCount++;
+ } else {
+ aCount++;
+ }
+ }
+ assertTrue(c != null);
+ assertTrue(ab != null || (a!=null && b!=null));
+ assertTrue(ab == null || (a==null && b==null));
+ assertTrue(dupChecker.add(rsp));
+ }
+ assertTrue(aCount==2160);
+ assertTrue(abCount==180);
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/core/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java Mon Jun 4 18:07:04 2012
@@ -23,7 +23,9 @@ import org.apache.lucene.util.LuceneTest
import org.apache.solr.common.util.NamedList;
import org.junit.Test;
+import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
/**
@@ -126,4 +128,75 @@ public class SpellingQueryConverterTest
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
+
+ @Test
+ public void testRequiredOrProhibitedFlags() {
+ SpellingQueryConverter converter = new SpellingQueryConverter();
+ converter.init(new NamedList());
+ converter.setAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
+
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa bbb ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 should be optional", !hasRequiredFlag(tokens.get(0)) && !hasProhibitedFlag(tokens.get(0)));
+ assertTrue("token 2 should be optional", !hasRequiredFlag(tokens.get(1)) && !hasProhibitedFlag(tokens.get(1)));
+ assertTrue("token 3 should be optional", !hasRequiredFlag(tokens.get(2)) && !hasProhibitedFlag(tokens.get(2)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("+aaa bbb -ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 should be required", hasRequiredFlag(tokens.get(0)) && !hasProhibitedFlag(tokens.get(0)));
+ assertTrue("token 2 should be optional", !hasRequiredFlag(tokens.get(1)) && !hasProhibitedFlag(tokens.get(1)));
+ assertTrue("token 3 should be prohibited", !hasRequiredFlag(tokens.get(2)) && hasProhibitedFlag(tokens.get(2)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa AND bbb ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 2 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa OR bbb OR ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 2 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa AND bbb NOT ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 3 doesn't precede n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa NOT bbb AND ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 precedes n.b.o.", hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 3 doesn't precedes n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
+ }
+ {
+ List<Token> tokens = new ArrayList<Token>(converter.convert("aaa AND NOT bbb AND ccc"));
+ assertTrue("Should have 3 tokens", tokens != null && tokens.size()==3);
+ assertTrue("token 1 precedes n.b.o.", hasNBOFlag(tokens.get(0)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 2 precedes n.b.o.", hasNBOFlag(tokens.get(1)) && hasInBooleanFlag(tokens.get(0)));
+ assertTrue("token 3 doesn't precedes n.b.o.", !hasNBOFlag(tokens.get(2)) && hasInBooleanFlag(tokens.get(0)));
+ }
+
+ }
+
+ private boolean hasRequiredFlag(Token t) {
+ return (t.getFlags() & QueryConverter.REQUIRED_TERM_FLAG) == QueryConverter.REQUIRED_TERM_FLAG;
+ }
+ private boolean hasProhibitedFlag(Token t) {
+ return (t.getFlags() & QueryConverter.PROHIBITED_TERM_FLAG) == QueryConverter.PROHIBITED_TERM_FLAG;
+ }
+ private boolean hasNBOFlag(Token t) {
+ return (t.getFlags() & QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG) == QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
+ }
+ private boolean hasInBooleanFlag(Token t) {
+ return (t.getFlags() & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG;
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/example/solr/conf/solrconfig.xml?rev=1346069&r1=1346068&r2=1346069&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/branch_4x/solr/example/solr/conf/solrconfig.xml Mon Jun 4 18:07:04 2012
@@ -881,10 +881,14 @@
<!-- Spell checking defaults -->
<str name="spellcheck">on</str>
+ <str name="spellcheck.extendedResults">false</str>
+ <str name="spellcheck.count">5</str>
+ <str name="spellcheck.alternativeTermCount">2</str>
+ <str name="spellcheck.maxResultsForSuggest">5</str>
<str name="spellcheck.collate">true</str>
- <str name="spellcheck.onlyMorePopular">false</str>
- <str name="spellcheck.extendedResults">false</str>
- <str name="spellcheck.count">3</str>
+ <str name="spellcheck.collateExtendedResults">true</str>
+ <str name="spellcheck.maxCollationTries">5</str>
+ <str name="spellcheck.maxCollations">3</str>
</lst>
<!-- append spellchecking to our list of components -->
@@ -1171,6 +1175,16 @@
<float name="thresholdTokenFrequency">.01</float>
-->
</lst>
+
+ <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage -->
+ <lst name="spellchecker">
+ <str name="name">wordbreak</str>
+ <str name="classname">solr.WordBreakSolrSpellChecker</str>
+ <str name="field">name</str>
+ <str name="combineWords">true</str>
+ <str name="breakWords">true</str>
+ <int name="maxChanges">10</int>
+ </lst>
<!-- a spellchecker that uses a different distance measure -->
<!--
@@ -1226,9 +1240,21 @@
-->
<requestHandler name="/spell" class="solr.SearchHandler" startup="lazy">
<lst name="defaults">
- <str name="spellcheck.onlyMorePopular">false</str>
- <str name="spellcheck.extendedResults">false</str>
- <str name="spellcheck.count">1</str>
+ <!-- Solr will use suggestions from both the 'default' spellchecker
+ and from the 'wordbreak' spellchecker and combine them.
+ collations (re-written queries) can include a combination of
+ corrections from both spellcheckers -->
+ <str name="spellcheck.dictionary">default</str>
+ <str name="spellcheck.dictionary">wordbreak</str>
+ <str name="spellcheck">on</str>
+ <str name="spellcheck.extendedResults">true</str>
+ <str name="spellcheck.count">10</str>
+ <str name="spellcheck.alternativeTermCount">5</str>
+ <str name="spellcheck.maxResultsForSuggest">5</str>
+ <str name="spellcheck.collate">true</str>
+ <str name="spellcheck.collateExtendedResults">true</str>
+ <str name="spellcheck.maxCollationTries">10</str>
+ <str name="spellcheck.maxCollations">5</str>
</lst>
<arr name="last-components">
<str>spellcheck</str>