You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/09/17 10:26:41 UTC
svn commit: r816091 - in /lucene/solr/trunk/src:
java/org/apache/solr/spelling/SpellingQueryConverter.java
test/org/apache/solr/spelling/SpellingQueryConverterTest.java
Author: shalin
Date: Thu Sep 17 08:26:41 2009
New Revision: 816091
URL: http://svn.apache.org/viewvc?rev=816091&view=rev
Log:
SOLR-1407 followup -- Fix value regex, add tests for boost
Modified:
lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
Modified: lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=816091&r1=816090&r2=816091&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Thu Sep 17 08:26:41 2009
@@ -76,7 +76,7 @@
NMTOKEN = "([" + sb.toString() + "]|" + SURROGATE_PAIR + ")+";
}
- final static String PATTERN = "(?:(?!(" + NMTOKEN + ":|\\d+)))[^\\s]+";
+ final static String PATTERN = "(?:(?!(" + NMTOKEN + ":|\\d+)))[\\p{L}_\\-0-9]+";
// previous version: Pattern.compile("(?:(?!(\\w+:|\\d+)))\\w+");
protected Pattern QUERY_REGEX = Pattern.compile(PATTERN);
Modified: lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java?rev=816091&r1=816090&r2=816091&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java Thu Sep 17 08:26:41 2009
@@ -21,10 +21,12 @@
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.solr.common.util.NamedList;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
import org.junit.Test;
import org.junit.Assert;
import java.util.Collection;
+import java.util.ArrayList;
/**
@@ -50,22 +52,45 @@
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
- Collection<Token> tokens = converter.convert("field_with_underscore:value_with_underscore");
+ String original = "field_with_underscore:value_with_underscore";
+ Collection<Token> tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
- tokens = converter.convert("field_with_digits123:value_with_digits123");
+ original = "field_with_digits123:value_with_digits123";
+ tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
- tokens = converter.convert("field-with-hyphens:value-with-hyphens");
+ original = "field-with-hyphens:value-with-hyphens";
+ tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
// mix 'em up and add some to the value
- tokens = converter.convert("field_with-123s:value_,.|with-hyphens");
+// original = "field_with-123s:value_,.|with-hyphens";
+// tokens = converter.convert(original);
+// assertTrue("tokens is null and it shouldn't be", tokens != null);
+// assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+// assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
+
+ original = "foo:bar^5.0";
+ tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
+ }
+
+ private boolean isOffsetCorrect(String s, Collection<Token> tokens) {
+ for (Token token : tokens) {
+ int start = token.startOffset();
+ int end = token.endOffset();
+ if (!s.substring(start, end).equals(token.term())) return false;
+ }
+ return true;
}
@Test
@@ -77,15 +102,15 @@
// chinese text value
Collection<Token> tokens = converter.convert("text_field:æè´ä¹°äºéå
·åæè£
ã");
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_è´field:æè´ä¹°äºéå
·åæè£
ã");
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_field:æè´xyzä¹°äºéå
·åæè£
ã");
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
@Test
@@ -97,11 +122,11 @@
// two field:value pairs should give two tokens
Collection<Token> tokens = converter.convert("ä¹°text_field:æè´ä¹°äºéå
·åæè£
ã field2:bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
// a field:value pair and a search term should give two tokens
tokens = converter.convert("text_field:æè´ä¹°äºéå
·åæè£
ã bar");
assertTrue("tokens is null and it shouldn't be", tokens != null);
- Assert.assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
+ assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
}
}