You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by no...@apache.org on 2009/12/11 10:03:30 UTC

svn commit: r889537 - in /lucene/solr/trunk: CHANGES.txt src/common/org/apache/solr/common/params/TermsParams.java src/java/org/apache/solr/handler/component/TermsComponent.java src/test/org/apache/solr/handler/component/TermsComponentTest.java

Author: noble
Date: Fri Dec 11 09:03:28 2009
New Revision: 889537

URL: http://svn.apache.org/viewvc?rev=889537&view=rev
Log:
SOLR-1625 Add regexp support for TermsComponent

Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
    lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
    lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=889537&r1=889536&r2=889537&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Fri Dec 11 09:03:28 2009
@@ -53,8 +53,7 @@
 * SOLR-1571: Added unicode collation support though Lucene's CollationKeyFilter
   (Robert Muir via shalin)
 
-* SOLR-785: Distributed Search support for SpellCheckComponent
-  (Matthew Woytowitz, shalin)
+* SOLR-1625: Add regexp support for TermsComponent (Uri Boness via noble)
 
 Optimizations
 ----------------------
@@ -142,8 +141,6 @@
 * SOLR-1608: Extract base class from TestDistributedSearch to make
   it easy to write test cases for other distributed components. (shalin)
 
-* Upgraded to Lucene 2.9-dev r888785 (shalin)
-
 Build
 ----------------------
 

Modified: lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java?rev=889537&r1=889536&r2=889537&view=diff
==============================================================================
--- lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java (original)
+++ lucene/solr/trunk/src/common/org/apache/solr/common/params/TermsParams.java Fri Dec 11 09:03:28 2009
@@ -17,6 +17,7 @@
 
 package org.apache.solr.common.params;
 
+import java.util.regex.Pattern;
 
 /**
  *
@@ -68,7 +69,32 @@
 
   public static final String TERMS_PREFIX_STR = TERMS_PREFIX + "prefix";
 
-  /**
+  public static final String TERMS_REGEXP_STR = TERMS_PREFIX + "regex";
+
+  public static final String TERMS_REGEXP_FLAG = TERMS_REGEXP_STR + ".flag";
+
+  public static enum TermsRegexpFlag {
+      UNIX_LINES(Pattern.UNIX_LINES),
+      CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE),
+      COMMENTS(Pattern.COMMENTS),
+      MULTILINE(Pattern.MULTILINE),
+      LITERAL(Pattern.LITERAL),
+      DOTALL(Pattern.DOTALL),
+      UNICODE_CASE(Pattern.UNICODE_CASE),
+      CANON_EQ(Pattern.CANON_EQ);
+
+      int value;
+
+      TermsRegexpFlag(int value) {
+          this.value = value;
+      }
+
+      public int getValue() {
+          return value;
+      }
+  }
+
+    /**
    * Optional.  The minimum value of docFreq to be returned.  1 by default
    */
   public static final String TERMS_MINCOUNT = TERMS_PREFIX + "mincount";

Modified: lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=889537&r1=889536&r2=889537&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/handler/component/TermsComponent.java Fri Dec 11 09:03:28 2009
@@ -20,7 +20,6 @@
 import org.apache.lucene.index.TermEnum;
 import org.apache.lucene.util.StringHelper;
 import org.apache.solr.common.SolrException;
-import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.TermsParams;
 import org.apache.solr.common.util.NamedList;
@@ -30,7 +29,7 @@
 import org.apache.solr.util.BoundedTreeSet;
 
 import java.io.IOException;
-
+import java.util.regex.Pattern;
 
 /**
  * Return TermEnum information, useful for things like auto suggest.
@@ -65,6 +64,9 @@
           freqmax = Integer.MAX_VALUE;
         }
         String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
+        String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
+        Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;
+
         boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
         for (int j = 0; j < fields.length; j++) {
           String field = StringHelper.intern(fields[j]);
@@ -105,6 +107,11 @@
             // stop if the prefix doesn't match
             if (prefix != null && !indexedText.startsWith(prefix)) break;
 
+            if (pattern != null && !pattern.matcher(indexedText).matches()) {
+                termEnum.next();
+                continue;
+            }
+
             if (upperTerm != null) {
               int upperCmp = theTerm.compareTo(upperTerm);
               // if we are past the upper term, or equal to it (when don't include upper) then stop.
@@ -146,6 +153,22 @@
     }
   }
 
+  int resolveRegexpFlags(SolrParams params) {
+      String[] flagParams = params.getParams(TermsParams.TERMS_REGEXP_FLAG);
+      if (flagParams == null) {
+          return 0;
+      }
+      int flags = 0;
+      for (String flagParam : flagParams) {
+          try {
+            flags |= TermsParams.TermsRegexpFlag.valueOf(flagParam.toUpperCase()).getValue();
+          } catch (IllegalArgumentException iae) {
+              throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown terms regex flag '" + flagParam + "'");
+          }
+      }
+      return flags;
+  }
+
   public void prepare(ResponseBuilder rb) throws IOException {
     //nothing to do
   }

Modified: lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java?rev=889537&r1=889536&r2=889537&view=diff
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java (original)
+++ lucene/solr/trunk/src/test/org/apache/solr/handler/component/TermsComponentTest.java Fri Dec 11 09:03:28 2009
@@ -26,6 +26,7 @@
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryResponse;
 
+import java.util.regex.Pattern;
 
 /**
  *
@@ -210,6 +211,73 @@
     assertTrue("value is null and it shouldn't be", value != null);
   }
 
+  public void testRegexp() throws Exception {
+    SolrCore core = h.getCore();
+    TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+    assertTrue("tc is null and it shouldn't be", tc != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(TermsParams.TERMS, "true");
+    params.add(TermsParams.TERMS_FIELD, "standardfilt");
+    params.add(TermsParams.TERMS_LOWER,  "bb");
+    params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+    params.add(TermsParams.TERMS_REGEXP_STR, "b.*");
+    params.add(TermsParams.TERMS_UPPER, "bbbb");
+    params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
+    params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
+    SolrRequestHandler handler;
+    SolrQueryResponse rsp;
+    NamedList values;
+    NamedList terms;
+    handler = core.getRequestHandler("/terms");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    values = rsp.getValues();
+    terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+    assertEquals("terms Size: " + terms.size() + " is not: 1", 1, terms.size());
+  }
+
+  public void testRegexpFlagParsing() {
+      ModifiableSolrParams params = new ModifiableSolrParams();
+      params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive", "literal", "comments", "multiline", "unix_lines",
+              "unicode_case", "dotall", "canon_eq");
+      int flags = new TermsComponent().resolveRegexpFlags(params);
+      int expected = Pattern.CASE_INSENSITIVE | Pattern.LITERAL | Pattern.COMMENTS | Pattern.MULTILINE | Pattern.UNIX_LINES
+              | Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ;
+      assertEquals(expected, flags);
+  }
+
+  public void testRegexpWithFlags() throws Exception {
+    SolrCore core = h.getCore();
+    TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
+    assertTrue("tc is null and it shouldn't be", tc != null);
+
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(TermsParams.TERMS, "true");
+    params.add(TermsParams.TERMS_FIELD, "standardfilt");
+    params.add(TermsParams.TERMS_LOWER,  "bb");
+    params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
+    params.add(TermsParams.TERMS_REGEXP_STR, "B.*");
+    params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive");
+    params.add(TermsParams.TERMS_UPPER, "bbbb");
+    params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
+    params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
+    SolrRequestHandler handler;
+    SolrQueryResponse rsp;
+    NamedList values;
+    NamedList terms;
+    handler = core.getRequestHandler("/terms");
+    assertTrue("handler is null and it shouldn't be", handler != null);
+    rsp = new SolrQueryResponse();
+    rsp.add("responseHeader", new SimpleOrderedMap());
+    handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
+    values = rsp.getValues();
+    terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
+    assertEquals("terms Size: " + terms.size() + " is not: 1", 1, terms.size());
+  }
+
   public void testSortCount() throws Exception {
     SolrCore core = h.getCore();
     TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");