You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2017/04/20 12:07:12 UTC

lucene-solr:master: SOLR-10505: Add multi-field support to TermsComponent for terms stats

Repository: lucene-solr
Updated Branches:
  refs/heads/master ffe61ff2a -> 19bcffa03


SOLR-10505: Add multi-field support to TermsComponent for terms stats


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/19bcffa0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/19bcffa0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/19bcffa0

Branch: refs/heads/master
Commit: 19bcffa0362b5d8d9d85dbe9c68497d4d81b8436
Parents: ffe61ff
Author: Shai Erera <sh...@apache.org>
Authored: Tue Apr 18 06:33:18 2017 +0300
Committer: Shai Erera <sh...@apache.org>
Committed: Thu Apr 20 15:03:27 2017 +0300

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 +
 .../solr/handler/component/TermsComponent.java  | 82 ++++++++++----------
 .../DistributedTermsComponentTest.java          | 10 ++-
 .../handler/component/TermsComponentTest.java   | 27 +++++++
 4 files changed, 75 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19bcffa0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 64de854..6d55a3e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -175,6 +175,8 @@ New Features
   Example: json.facet={x:"stddev(field1)", y:"variance(field2)"}
   (Rustam Hashimov, yonik)
 
+* SOLR-10505: Add multi-field support to TermsComponent when requesting terms' statistics. (Shai Erera)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19bcffa0/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index b05939e..6c89e1d 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -101,8 +101,8 @@ public class TermsComponent extends SearchComponent {
 
     boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
 
-    if(termStats) {
-      NamedList<Number> stats = new SimpleOrderedMap();
+    if (termStats) {
+      NamedList<Number> stats = new SimpleOrderedMap<>();
       rb.rsp.add("indexstats", stats);
       collectStats(rb.req.getSearcher(), stats);
     }
@@ -335,7 +335,7 @@ public class TermsComponent extends SearchComponent {
     rb._termsHelper = null;
   }
 
-  private ShardRequest createShardQuery(SolrParams params) {
+  private static ShardRequest createShardQuery(SolrParams params) {
     ShardRequest sreq = new ShardRequest();
     sreq.purpose = ShardRequest.PURPOSE_GET_TERMS;
 
@@ -410,7 +410,7 @@ public class TermsComponent extends SearchComponent {
       }
     }
 
-    public NamedList buildResponse() {
+    public NamedList<Object> buildResponse() {
       NamedList<Object> response = new SimpleOrderedMap<>();
 
       // determine if we are going index or count sort
@@ -480,7 +480,7 @@ public class TermsComponent extends SearchComponent {
     }
 
     // use <int> tags for smaller facet counts (better back compatibility)
-    private Number num(long val) {
+    private static Number num(long val) {
       if (val < Integer.MAX_VALUE) return (int) val;
       else return val;
     }
@@ -515,53 +515,51 @@ public class TermsComponent extends SearchComponent {
     }
   }
 
-  private void fetchTerms(SolrIndexSearcher indexSearcher,
-                          String[] fields,
-                          String termList,
-                          boolean includeTotalTermFreq,
-                          NamedList result) throws IOException {
-
-    String field = fields[0];
-    FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
+  private static void fetchTerms(SolrIndexSearcher indexSearcher, String[] fields, String termList, 
+      boolean includeTotalTermFreq, NamedList<Object> result) throws IOException {
     String[] splitTerms = termList.split(",");
-
-    for(int i=0; i<splitTerms.length; i++) {
+    for (int i = 0; i < splitTerms.length; i++) {
       splitTerms[i] = splitTerms[i].trim();
     }
+    // Sort the terms once
+    Arrays.sort(splitTerms);
 
-    Term[] terms = new Term[splitTerms.length];
-    for(int i=0; i<splitTerms.length; i++) {
-      terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
-    }
+    IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
+    for (String field : fields) {
+      FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
+
+      // Since splitTerms is already sorted, this array will also be sorted
+      Term[] terms = new Term[splitTerms.length];
+      for (int i = 0; i < splitTerms.length; i++) {
+        terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
+      }
 
-    Arrays.sort(terms);
+      TermContext[] termContexts = new TermContext[terms.length];
+      collectTermContext(topReaderContext, termContexts, terms);
 
-    IndexReaderContext topReaderContext = indexSearcher.getTopReaderContext();
-    TermContext[] termContexts = new TermContext[terms.length];
-    collectTermContext(topReaderContext, termContexts, terms);
-
-    NamedList termsMap = new SimpleOrderedMap();
-    for (int i = 0; i < terms.length; i++) {
-      if (termContexts[i] != null) {
-        String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
-        int docFreq = termContexts[i].docFreq();
-        if (!includeTotalTermFreq) {
-          termsMap.add(outTerm, docFreq);
-        } else {
-          long totalTermFreq = termContexts[i].totalTermFreq();
-          NamedList<Long> termStats = new SimpleOrderedMap<>();
-          termStats.add("docFreq", (long) docFreq);
-          termStats.add("totalTermFreq", totalTermFreq);
-          termsMap.add(outTerm, termStats);
+      NamedList<Object> termsMap = new SimpleOrderedMap<>();
+      for (int i = 0; i < terms.length; i++) {
+        if (termContexts[i] != null) {
+          String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
+          int docFreq = termContexts[i].docFreq();
+          if (!includeTotalTermFreq) {
+            termsMap.add(outTerm, docFreq);
+          } else {
+            long totalTermFreq = termContexts[i].totalTermFreq();
+            NamedList<Long> termStats = new SimpleOrderedMap<>();
+            termStats.add("docFreq", (long) docFreq);
+            termStats.add("totalTermFreq", totalTermFreq);
+            termsMap.add(outTerm, termStats);
+          }
         }
       }
-    }
 
-    result.add(field, termsMap);
+      result.add(field, termsMap);
+    }
   }
 
-  private void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray, Term[] queryTerms)
-      throws IOException {
+  private static void collectTermContext(IndexReaderContext topReaderContext, TermContext[] contextArray,
+      Term[] queryTerms) throws IOException {
     TermsEnum termsEnum = null;
     for (LeafReaderContext context : topReaderContext.leaves()) {
       final Fields fields = context.reader().fields();
@@ -589,7 +587,7 @@ public class TermsComponent extends SearchComponent {
     }
   }
 
-  private void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
+  private static void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
     int numDocs = searcher.getTopReaderContext().reader().numDocs();
     stats.add("numDocs", Long.valueOf(numDocs));
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19bcffa0/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
index 9c90efb..b3f1f30 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
@@ -34,13 +34,14 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
     index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
     index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
     index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
-    index(id, 22, "b_t", "snake spider");
-    index(id, 23, "b_t", "snake");
-    index(id, 24, "b_t", "ant zebra");
-    index(id, 25, "b_t", "zebra");
+    index(id, 22, "b_t", "snake spider", "c_t", "snake spider");
+    index(id, 23, "b_t", "snake", "c_t", "snake");
+    index(id, 24, "b_t", "ant zebra", "c_t", "ant zebra");
+    index(id, 25, "b_t", "zebra", "c_t", "zebra");
     commit();
 
     handle.clear();
+    handle.put("terms", UNORDERED);
 
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t");
     query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.lower", "s");
@@ -53,5 +54,6 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra", "terms.ttf", "true");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.fl", "c_t", "terms.list", "snake, ant, zebra", "terms.ttf", "true");
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/19bcffa0/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
index 7fb5e12..29e54e0 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
@@ -351,4 +351,31 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
         "//lst[@name='standardfilt']/lst[@name='snake']/long[@name='totalTermFreq'][.='3']");
   }
 
+  @Test
+  public void testDocFreqAndTotalTermFreqForMultipleFields() throws Exception {
+    SolrQueryRequest req = req(
+        "indent","true",
+        "qt", "/terms",
+        "terms", "true",
+        "terms.fl", "lowerfilt",
+        "terms.fl", "standardfilt",
+        "terms.ttf", "true",
+        "terms.list", "a,aa,aaa");
+    assertQ(req,
+        "count(//lst[@name='lowerfilt']/*)=3",
+        "count(//lst[@name='standardfilt']/*)=3",
+        "//lst[@name='lowerfilt']/lst[@name='a']/long[@name='docFreq'][.='2']",
+        "//lst[@name='lowerfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='2']",
+        "//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
+        "//lst[@name='lowerfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
+        "//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
+        "//lst[@name='lowerfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='a']/long[@name='docFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='a']/long[@name='totalTermFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='aa']/long[@name='docFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='aa']/long[@name='totalTermFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='docFreq'][.='1']",
+        "//lst[@name='standardfilt']/lst[@name='aaa']/long[@name='totalTermFreq'][.='1']");
+  }
+
 }