You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by sh...@apache.org on 2009/09/17 10:26:41 UTC

svn commit: r816091 - in /lucene/solr/trunk/src: java/org/apache/solr/spelling/SpellingQueryConverter.java test/org/apache/solr/spelling/SpellingQueryConverterTest.java

Author: shalin
Date: Thu Sep 17 08:26:41 2009
New Revision: 816091

URL: http://svn.apache.org/viewvc?rev=816091&view=rev
Log:
SOLR-1407 followup -- Fix value regex, add tests for boost

Modified:
    lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
    lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java

Modified: lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java?rev=816091&r1=816090&r2=816091&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/spelling/SpellingQueryConverter.java Thu Sep 17 08:26:41 2009
@@ -76,7 +76,7 @@
     NMTOKEN = "([" + sb.toString() + "]|" + SURROGATE_PAIR + ")+";
   }
 
-  final static String PATTERN = "(?:(?!(" + NMTOKEN + ":|\\d+)))[^\\s]+";
+  final static String PATTERN = "(?:(?!(" + NMTOKEN + ":|\\d+)))[\\p{L}_\\-0-9]+";
   // previous version: Pattern.compile("(?:(?!(\\w+:|\\d+)))\\w+");
   protected Pattern QUERY_REGEX = Pattern.compile(PATTERN);
 

Modified: lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java?rev=816091&r1=816090&r2=816091&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java Thu Sep 17 08:26:41 2009
@@ -21,10 +21,12 @@
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.solr.common.util.NamedList;
 import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertEquals;
 import org.junit.Test;
 import org.junit.Assert;
 
 import java.util.Collection;
+import java.util.ArrayList;
 
 
 /**
@@ -50,22 +52,45 @@
     SpellingQueryConverter converter = new SpellingQueryConverter();
     converter.init(new NamedList());
     converter.setAnalyzer(new WhitespaceAnalyzer());
-    Collection<Token> tokens = converter.convert("field_with_underscore:value_with_underscore");
+    String original = "field_with_underscore:value_with_underscore";
+    Collection<Token> tokens = converter.convert(original);
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
 
-    tokens = converter.convert("field_with_digits123:value_with_digits123");
+    original = "field_with_digits123:value_with_digits123";
+    tokens = converter.convert(original);
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
 
-    tokens = converter.convert("field-with-hyphens:value-with-hyphens");
+    original = "field-with-hyphens:value-with-hyphens";
+    tokens = converter.convert(original);
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
 
     // mix 'em up and add some to the value
-    tokens = converter.convert("field_with-123s:value_,.|with-hyphens");
+//    original = "field_with-123s:value_,.|with-hyphens";
+//    tokens = converter.convert(original);
+//    assertTrue("tokens is null and it shouldn't be", tokens != null);
+//    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+//    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
+
+    original = "foo:bar^5.0";
+    tokens = converter.convert(original);
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
+  }
+
+  private boolean isOffsetCorrect(String s, Collection<Token> tokens) {
+    for (Token token : tokens) {
+      int start = token.startOffset();
+      int end = token.endOffset();
+      if (!s.substring(start, end).equals(token.term()))  return false;
+    }
+    return true;
   }
 
   @Test
@@ -77,15 +102,15 @@
     // chinese text value
     Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
 
     tokens = converter.convert("text_购field:我购买了道具和服装。");
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
 
     tokens = converter.convert("text_field:我购xyz买了道具和服装。");
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
   }
 
   @Test
@@ -97,11 +122,11 @@
     // two field:value pairs should give two tokens
     Collection<Token> tokens = converter.convert("买text_field:我购买了道具和服装。 field2:bar");
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
 
     // a field:value pair and a search term should give two tokens
     tokens = converter.convert("text_field:我购买了道具和服装。 bar");
     assertTrue("tokens is null and it shouldn't be", tokens != null);
-    Assert.assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
+    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
   }
 }