You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jb...@apache.org on 2016/07/06 21:04:55 UTC

[3/6] lucene-solr:branch_6x: SOLR-9243:Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms

SOLR-9243:Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1427f4b2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1427f4b2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1427f4b2

Branch: refs/heads/branch_6x
Commit: 1427f4b2e7599504dc96c4395fd861ffb8224d26
Parents: 1e1dc91
Author: jbernste <jb...@apache.org>
Authored: Thu Jun 23 10:04:43 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../solr/handler/component/TermsComponent.java  | 85 +++++++++++++++++++-
 .../DistributedTermsComponentTest.java          | 11 ++-
 .../handler/component/TermsComponentTest.java   | 46 ++++++++---
 .../apache/solr/common/params/TermsParams.java  |  6 ++
 4 files changed, 133 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index a949268..8a0bad3 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -28,6 +28,7 @@ import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.StrField;
 import org.apache.solr.request.SimpleFacets.CountPair;
+import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.BoundedTreeSet;
 import org.apache.solr.client.solrj.response.TermsResponse;
 
@@ -92,6 +93,12 @@ public class TermsComponent extends SearchComponent {
 
     if (fields == null || fields.length==0) return;
 
+    String termList = params.get(TermsParams.TERMS_LIST);
+    if(termList != null) {
+      fetchTerms(rb.req.getSearcher(), fields, termList, termsResult);
+      return;
+    }
+
     int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
     if (limit < 0) {
       limit = Integer.MAX_VALUE;
@@ -377,8 +384,12 @@ public class TermsComponent extends SearchComponent {
       NamedList<Object> response = new SimpleOrderedMap<>();
 
       // determine if we are going index or count sort
-      boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(
-          TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
+      boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(TermsParams.TERMS_SORT,
+                                                                     TermsParams.TERMS_SORT_COUNT));
+      if(params.get(TermsParams.TERMS_LIST) != null) {
+        //Always use lexical sort when TERM_LIST is provided
+        sort = false;
+      }
 
       // init minimum frequency
       long freqmin = 1;
@@ -466,6 +477,76 @@ public class TermsComponent extends SearchComponent {
     }
   }
 
+  private void fetchTerms(SolrIndexSearcher indexSearcher,
+                          String[] fields,
+                          String termList,
+                          NamedList result) throws IOException {
+
+    NamedList termsMap = new SimpleOrderedMap();
+    List<LeafReaderContext> leaves = indexSearcher.getTopReaderContext().leaves();
+    String field = fields[0];
+    FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
+    String[] splitTerms = termList.split(",");
+
+    for(int i=0; i<splitTerms.length; i++) {
+      splitTerms[i] = splitTerms[i].trim();
+    }
+
+    Term[] terms = new Term[splitTerms.length];
+    TermContext[] termContexts = new TermContext[terms.length];
+    for(int i=0; i<splitTerms.length; i++) {
+      terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
+    }
+
+    Arrays.sort(terms);
+
+    collectTermContext(indexSearcher.getTopReaderContext().reader(), leaves, termContexts, terms);
+
+    for(int i=0; i<terms.length; i++) {
+      if(termContexts[i] != null) {
+        String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
+        int docFreq = termContexts[i].docFreq();
+        termsMap.add(outTerm, docFreq);
+      }
+    }
+
+    result.add(field, termsMap);
+  }
+
+  private void collectTermContext(IndexReader reader,
+                                 List<LeafReaderContext> leaves, TermContext[] contextArray,
+                                 Term[] queryTerms) throws IOException {
+    TermsEnum termsEnum = null;
+    for (LeafReaderContext context : leaves) {
+      final Fields fields = context.reader().fields();
+      for (int i = 0; i < queryTerms.length; i++) {
+        Term term = queryTerms[i];
+        TermContext termContext = contextArray[i];
+        final Terms terms = fields.terms(term.field());
+        if (terms == null) {
+          // field does not exist
+          continue;
+        }
+        termsEnum = terms.iterator();
+        assert termsEnum != null;
+
+        if (termsEnum == TermsEnum.EMPTY) continue;
+        if (termsEnum.seekExact(term.bytes())) {
+          if (termContext == null) {
+            contextArray[i] = new TermContext(reader.getContext(),
+                termsEnum.termState(), context.ord, termsEnum.docFreq(),
+                termsEnum.totalTermFreq());
+          } else {
+            termContext.register(termsEnum.termState(), context.ord,
+                termsEnum.docFreq(), termsEnum.totalTermFreq());
+          }
+
+        }
+
+      }
+    }
+  }
+
   @Override
   public String getDescription() {
     return "A Component for working with Term Enumerators";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
index bcd2f25..dba7cc4 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
@@ -30,10 +30,10 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
   @Test
   public void test() throws Exception {
     del("*:*");
-    index(id, 18, "b_t", "snake spider shark snail slug seal");
-    index(id, 19, "b_t", "snake spider shark snail slug");
-    index(id, 20, "b_t", "snake spider shark snail");
-    index(id, 21, "b_t", "snake spider shark");
+    index(id, 18, "b_t", "snake spider shark snail slug seal", "foo_i", "1");
+    index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
+    index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
+    index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
     index(id, 22, "b_t", "snake spider");
     index(id, 23, "b_t", "snake");
     index(id, 24, "b_t", "ant zebra");
@@ -49,5 +49,8 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
     query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
     query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra, ant, bad");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
+
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
index 934a632..19bd4e1 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
@@ -32,9 +32,9 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   @BeforeClass
   public static void beforeTest() throws Exception {
     System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
-    initCore("solrconfig.xml","schema12.xml");
+    initCore("solrconfig.xml", "schema12.xml");
 
-    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i","1")));
+    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i", "1")));
     assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa", "foo_i","1")));
     assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa", "foo_i","2")));
     assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "aaa", "standardfilt", "abbb")));
@@ -45,7 +45,10 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "baa", "standardfilt", "cccc")));
     assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "bbb", "standardfilt", "ccccc")));
 
+
     assertNull(h.validateUpdate(adoc("id", "10", "standardfilt", "ddddd")));
+
+    assertNull(h.validateUpdate(commit()));
     assertNull(h.validateUpdate(adoc("id", "11", "standardfilt", "ddddd")));
     assertNull(h.validateUpdate(adoc("id", "12", "standardfilt", "ddddd")));
     assertNull(h.validateUpdate(adoc("id", "13", "standardfilt", "ddddd")));
@@ -53,6 +56,8 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     assertNull(h.validateUpdate(adoc("id", "15", "standardfilt", "d")));
     assertNull(h.validateUpdate(adoc("id", "16", "standardfilt", "d")));
 
+    assertNull(h.validateUpdate(commit()));
+
     assertNull(h.validateUpdate(adoc("id", "17", "standardfilt", "snake")));
     assertNull(h.validateUpdate(adoc("id", "18", "standardfilt", "spider")));
     assertNull(h.validateUpdate(adoc("id", "19", "standardfilt", "shark")));
@@ -137,13 +142,13 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   @Test
   public void testRegexpWithFlags() throws Exception {
     // TODO: there are no uppercase or mixed-case terms in the index!
-    assertQ(req("indent","true", "qt","/terms",  "terms","true",
-        "terms.fl","standardfilt",
-        "terms.lower","a", "terms.lower.incl","false",
-        "terms.upper","c", "terms.upper.incl","true",
-        "terms.regex","B.*",
-        "terms.regex.flag","case_insensitive")
-        ,"count(//lst[@name='standardfilt']/*)=3"               
+    assertQ(req("indent", "true", "qt", "/terms", "terms", "true",
+            "terms.fl", "standardfilt",
+            "terms.lower", "a", "terms.lower.incl", "false",
+            "terms.upper", "c", "terms.upper.incl", "true",
+            "terms.regex", "B.*",
+            "terms.regex.flag", "case_insensitive")
+        , "count(//lst[@name='standardfilt']/*)=3"
     );
   }
 
@@ -163,6 +168,29 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   }
 
   @Test
+  public void testTermsList() throws Exception {
+    //Terms list always returns in index order
+    assertQ(req("indent","true", "qt","/terms",  "terms","true",
+            "terms.fl","standardfilt",
+            "terms.list","spider, snake, shark, ddddd, bad")
+        ,"count(//lst[@name='standardfilt']/*)=4"
+        ,"//lst[@name='standardfilt']/int[1][@name='ddddd'][.='4']"
+        ,"//lst[@name='standardfilt']/int[2][@name='shark'][.='2']"
+        ,"//lst[@name='standardfilt']/int[3][@name='snake'][.='3']"
+        ,"//lst[@name='standardfilt']/int[4][@name='spider'][.='1']"
+    );
+
+    //Test with numeric terms
+    assertQ(req("indent","true", "qt","/terms",  "terms","true",
+            "terms.fl","foo_i",
+            "terms.list","2, 1")
+        ,"count(//lst[@name='foo_i']/*)=2"
+        ,"//lst[@name='foo_i']/int[1][@name='1'][.='2']"
+        ,"//lst[@name='foo_i']/int[2][@name='2'][.='1']"
+    );
+  }
+
+  @Test
   public void testSortIndex() throws Exception {
     assertQ(req("indent","true", "qt","/terms",  "terms","true",
         "terms.fl","standardfilt",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
index ff1be5f..470b14d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
@@ -39,6 +39,12 @@ public interface TermsParams {
   public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
 
   /**
+   * Optional. The list of terms to be retrieved.
+   *
+   */
+  public static final String TERMS_LIST = TERMS_PREFIX + "list";
+
+  /**
    * Optional.  The lower bound term to start at.  The TermEnum will start at the next term after this term in the dictionary.
    *
    * If not specified, the empty string is used