You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jb...@apache.org on 2016/07/06 21:04:53 UTC

[1/6] lucene-solr:branch_6x: SOLR-9193: Added terms.limit and distrib=true params to /terms request

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 1e1dc91bb -> e27849052


SOLR-9193: Added terms.limit and distrib=true params to /terms request


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e2784905
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e2784905
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e2784905

Branch: refs/heads/branch_6x
Commit: e27849052ebd7d2314560eb5a1704ca33d442565
Parents: 7a5e6a5
Author: jbernste <jb...@apache.org>
Authored: Tue Jul 5 20:33:58 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../apache/solr/client/solrj/io/stream/ScoreNodesStream.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e2784905/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
index 814b69c..9f61baa 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
@@ -35,7 +35,6 @@ import org.apache.solr.client.solrj.io.stream.expr.Expressible;
 import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
 import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
 import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
-import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
 import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
 import org.apache.solr.client.solrj.request.QueryRequest;
 import org.apache.solr.common.params.CommonParams;
@@ -48,10 +47,10 @@ import org.apache.solr.common.util.NamedList;
  *
  *  Expression Syntax:
  *
- *  Default function call uses the "count(*)" value for node freq.
+ *  Default function call uses the "count(*)" field for node freq.
  *
  *  You can use a different value for node freq by providing the nodeFreq param
- *  scoreNodes(gatherNodes(...), nodeFreq="min(weight)")
+ *  scoreNodes(gatherNodes(...), termFreq="min(weight)")
  *
  **/
 
@@ -188,6 +187,9 @@ public class ScoreNodesStream extends TupleStream implements Expressible
     params.add(TermsParams.TERMS_FIELD, field);
     params.add(TermsParams.TERMS_STATS, "true");
     params.add(TermsParams.TERMS_LIST, builder.toString());
+    params.add(TermsParams.TERMS_LIMIT, Integer.toString(nodes.size()));
+    params.add("distrib", "true");
+
     QueryRequest request = new QueryRequest(params);
 
 


[6/6] lucene-solr:branch_6x: SOLR-9193: fixing failing tests due to changes in TermsComponent

Posted by jb...@apache.org.
SOLR-9193: fixing failing tests due to changes in TermsComponent


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ed86e014
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ed86e014
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ed86e014

Branch: refs/heads/branch_6x
Commit: ed86e014f61474843a8dc064c912d91d51ff5cba
Parents: 879a245
Author: jbernste <jb...@apache.org>
Authored: Mon Jul 4 22:32:51 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../org/apache/solr/handler/component/TermsComponent.java    | 8 +++++---
 .../apache/solr/handler/component/TermsComponentTest.java    | 2 +-
 .../apache/solr/client/solrj/io/stream/ScoreNodesStream.java | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ed86e014/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index 28649db..daf5b5b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -67,6 +67,8 @@ public class TermsComponent extends SearchComponent {
     SolrParams params = rb.req.getParams();
     if (params.getBool(TermsParams.TERMS, false)) {
       rb.doTerms = true;
+    } else {
+      return;
     }
 
     // TODO: temporary... this should go in a different component.
@@ -97,7 +99,7 @@ public class TermsComponent extends SearchComponent {
 
     if(termStats) {
       NamedList<Number> stats = new SimpleOrderedMap();
-      rb.rsp.add("stats", stats);
+      rb.rsp.add("indexstats", stats);
       collectStats(rb.req.getSearcher(), stats);
     }
 
@@ -301,7 +303,7 @@ public class TermsComponent extends SearchComponent {
         th.parse(terms);
 
 
-        NamedList<Number> stats = (NamedList<Number>)srsp.getSolrResponse().getResponse().get("stats");
+        NamedList<Number> stats = (NamedList<Number>)srsp.getSolrResponse().getResponse().get("indexstats");
         if(stats != null) {
           th.numDocs += stats.get("numDocs").longValue();
           th.stats = true;
@@ -323,7 +325,7 @@ public class TermsComponent extends SearchComponent {
     if(ti.stats) {
       NamedList<Number> stats = new SimpleOrderedMap();
       stats.add("numDocs", Long.valueOf(ti.numDocs));
-      rb.rsp.add("stats", stats);
+      rb.rsp.add("indexstats", stats);
     }
     rb._termsHelper = null;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ed86e014/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
index 473b727..0974524 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
@@ -198,7 +198,7 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     assertQ(req("indent", "true", "qt", "/terms", "terms", "true",
             "terms.fl", "standardfilt","terms.stats", "true",
             "terms.list", "spider, snake, shark, ddddd, bad")
-        , "//lst[@name='stats']/int[1][@name='numDocs'][.='23']"
+        , "//lst[@name='indexstats']/long[1][@name='numDocs'][.='23']"
     );
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ed86e014/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
index f2aa070..0d305fd 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
@@ -189,7 +189,7 @@ public class ScoreNodesStream extends TupleStream implements Expressible
 
       //Get the response from the terms component
       NamedList response = client.request(request, collection);
-      NamedList<Number> stats = (NamedList<Number>)response.get("stats");
+      NamedList<Number> stats = (NamedList<Number>)response.get("indexstats");
       long numDocs = stats.get("numDocs").longValue();
       NamedList<NamedList<Number>> fields = (NamedList<NamedList<Number>>)response.get("terms");
 


[3/6] lucene-solr:branch_6x: SOLR-9243:Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms

Posted by jb...@apache.org.
SOLR-9243:Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/1427f4b2
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/1427f4b2
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/1427f4b2

Branch: refs/heads/branch_6x
Commit: 1427f4b2e7599504dc96c4395fd861ffb8224d26
Parents: 1e1dc91
Author: jbernste <jb...@apache.org>
Authored: Thu Jun 23 10:04:43 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../solr/handler/component/TermsComponent.java  | 85 +++++++++++++++++++-
 .../DistributedTermsComponentTest.java          | 11 ++-
 .../handler/component/TermsComponentTest.java   | 46 ++++++++---
 .../apache/solr/common/params/TermsParams.java  |  6 ++
 4 files changed, 133 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index a949268..8a0bad3 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -28,6 +28,7 @@ import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.StrField;
 import org.apache.solr.request.SimpleFacets.CountPair;
+import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.BoundedTreeSet;
 import org.apache.solr.client.solrj.response.TermsResponse;
 
@@ -92,6 +93,12 @@ public class TermsComponent extends SearchComponent {
 
     if (fields == null || fields.length==0) return;
 
+    String termList = params.get(TermsParams.TERMS_LIST);
+    if(termList != null) {
+      fetchTerms(rb.req.getSearcher(), fields, termList, termsResult);
+      return;
+    }
+
     int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
     if (limit < 0) {
       limit = Integer.MAX_VALUE;
@@ -377,8 +384,12 @@ public class TermsComponent extends SearchComponent {
       NamedList<Object> response = new SimpleOrderedMap<>();
 
       // determine if we are going index or count sort
-      boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(
-          TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
+      boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(TermsParams.TERMS_SORT,
+                                                                     TermsParams.TERMS_SORT_COUNT));
+      if(params.get(TermsParams.TERMS_LIST) != null) {
+        //Always use lexical sort when TERM_LIST is provided
+        sort = false;
+      }
 
       // init minimum frequency
       long freqmin = 1;
@@ -466,6 +477,76 @@ public class TermsComponent extends SearchComponent {
     }
   }
 
+  private void fetchTerms(SolrIndexSearcher indexSearcher,
+                          String[] fields,
+                          String termList,
+                          NamedList result) throws IOException {
+
+    NamedList termsMap = new SimpleOrderedMap();
+    List<LeafReaderContext> leaves = indexSearcher.getTopReaderContext().leaves();
+    String field = fields[0];
+    FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
+    String[] splitTerms = termList.split(",");
+
+    for(int i=0; i<splitTerms.length; i++) {
+      splitTerms[i] = splitTerms[i].trim();
+    }
+
+    Term[] terms = new Term[splitTerms.length];
+    TermContext[] termContexts = new TermContext[terms.length];
+    for(int i=0; i<splitTerms.length; i++) {
+      terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
+    }
+
+    Arrays.sort(terms);
+
+    collectTermContext(indexSearcher.getTopReaderContext().reader(), leaves, termContexts, terms);
+
+    for(int i=0; i<terms.length; i++) {
+      if(termContexts[i] != null) {
+        String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
+        int docFreq = termContexts[i].docFreq();
+        termsMap.add(outTerm, docFreq);
+      }
+    }
+
+    result.add(field, termsMap);
+  }
+
+  private void collectTermContext(IndexReader reader,
+                                 List<LeafReaderContext> leaves, TermContext[] contextArray,
+                                 Term[] queryTerms) throws IOException {
+    TermsEnum termsEnum = null;
+    for (LeafReaderContext context : leaves) {
+      final Fields fields = context.reader().fields();
+      for (int i = 0; i < queryTerms.length; i++) {
+        Term term = queryTerms[i];
+        TermContext termContext = contextArray[i];
+        final Terms terms = fields.terms(term.field());
+        if (terms == null) {
+          // field does not exist
+          continue;
+        }
+        termsEnum = terms.iterator();
+        assert termsEnum != null;
+
+        if (termsEnum == TermsEnum.EMPTY) continue;
+        if (termsEnum.seekExact(term.bytes())) {
+          if (termContext == null) {
+            contextArray[i] = new TermContext(reader.getContext(),
+                termsEnum.termState(), context.ord, termsEnum.docFreq(),
+                termsEnum.totalTermFreq());
+          } else {
+            termContext.register(termsEnum.termState(), context.ord,
+                termsEnum.docFreq(), termsEnum.totalTermFreq());
+          }
+
+        }
+
+      }
+    }
+  }
+
   @Override
   public String getDescription() {
     return "A Component for working with Term Enumerators";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
index bcd2f25..dba7cc4 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
@@ -30,10 +30,10 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
   @Test
   public void test() throws Exception {
     del("*:*");
-    index(id, 18, "b_t", "snake spider shark snail slug seal");
-    index(id, 19, "b_t", "snake spider shark snail slug");
-    index(id, 20, "b_t", "snake spider shark snail");
-    index(id, 21, "b_t", "snake spider shark");
+    index(id, 18, "b_t", "snake spider shark snail slug seal", "foo_i", "1");
+    index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
+    index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
+    index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
     index(id, 22, "b_t", "snake spider");
     index(id, 23, "b_t", "snake");
     index(id, 24, "b_t", "ant zebra");
@@ -49,5 +49,8 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
     query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
     query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra, ant, bad");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
+
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
index 934a632..19bd4e1 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
@@ -32,9 +32,9 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   @BeforeClass
   public static void beforeTest() throws Exception {
     System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
-    initCore("solrconfig.xml","schema12.xml");
+    initCore("solrconfig.xml", "schema12.xml");
 
-    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i","1")));
+    assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i", "1")));
     assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa", "foo_i","1")));
     assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa", "foo_i","2")));
     assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "aaa", "standardfilt", "abbb")));
@@ -45,7 +45,10 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "baa", "standardfilt", "cccc")));
     assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "bbb", "standardfilt", "ccccc")));
 
+
     assertNull(h.validateUpdate(adoc("id", "10", "standardfilt", "ddddd")));
+
+    assertNull(h.validateUpdate(commit()));
     assertNull(h.validateUpdate(adoc("id", "11", "standardfilt", "ddddd")));
     assertNull(h.validateUpdate(adoc("id", "12", "standardfilt", "ddddd")));
     assertNull(h.validateUpdate(adoc("id", "13", "standardfilt", "ddddd")));
@@ -53,6 +56,8 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     assertNull(h.validateUpdate(adoc("id", "15", "standardfilt", "d")));
     assertNull(h.validateUpdate(adoc("id", "16", "standardfilt", "d")));
 
+    assertNull(h.validateUpdate(commit()));
+
     assertNull(h.validateUpdate(adoc("id", "17", "standardfilt", "snake")));
     assertNull(h.validateUpdate(adoc("id", "18", "standardfilt", "spider")));
     assertNull(h.validateUpdate(adoc("id", "19", "standardfilt", "shark")));
@@ -137,13 +142,13 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   @Test
   public void testRegexpWithFlags() throws Exception {
     // TODO: there are no uppercase or mixed-case terms in the index!
-    assertQ(req("indent","true", "qt","/terms",  "terms","true",
-        "terms.fl","standardfilt",
-        "terms.lower","a", "terms.lower.incl","false",
-        "terms.upper","c", "terms.upper.incl","true",
-        "terms.regex","B.*",
-        "terms.regex.flag","case_insensitive")
-        ,"count(//lst[@name='standardfilt']/*)=3"               
+    assertQ(req("indent", "true", "qt", "/terms", "terms", "true",
+            "terms.fl", "standardfilt",
+            "terms.lower", "a", "terms.lower.incl", "false",
+            "terms.upper", "c", "terms.upper.incl", "true",
+            "terms.regex", "B.*",
+            "terms.regex.flag", "case_insensitive")
+        , "count(//lst[@name='standardfilt']/*)=3"
     );
   }
 
@@ -163,6 +168,29 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
   }
 
   @Test
+  public void testTermsList() throws Exception {
+    //Terms list always returns in index order
+    assertQ(req("indent","true", "qt","/terms",  "terms","true",
+            "terms.fl","standardfilt",
+            "terms.list","spider, snake, shark, ddddd, bad")
+        ,"count(//lst[@name='standardfilt']/*)=4"
+        ,"//lst[@name='standardfilt']/int[1][@name='ddddd'][.='4']"
+        ,"//lst[@name='standardfilt']/int[2][@name='shark'][.='2']"
+        ,"//lst[@name='standardfilt']/int[3][@name='snake'][.='3']"
+        ,"//lst[@name='standardfilt']/int[4][@name='spider'][.='1']"
+    );
+
+    //Test with numeric terms
+    assertQ(req("indent","true", "qt","/terms",  "terms","true",
+            "terms.fl","foo_i",
+            "terms.list","2, 1")
+        ,"count(//lst[@name='foo_i']/*)=2"
+        ,"//lst[@name='foo_i']/int[1][@name='1'][.='2']"
+        ,"//lst[@name='foo_i']/int[2][@name='2'][.='1']"
+    );
+  }
+
+  @Test
   public void testSortIndex() throws Exception {
     assertQ(req("indent","true", "qt","/terms",  "terms","true",
         "terms.fl","standardfilt",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1427f4b2/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
index ff1be5f..470b14d 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
@@ -39,6 +39,12 @@ public interface TermsParams {
   public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
 
   /**
+   * Optional. The list of terms to be retrieved.
+   *
+   */
+  public static final String TERMS_LIST = TERMS_PREFIX + "list";
+
+  /**
    * Optional.  The lower bound term to start at.  The TermEnum will start at the next term after this term in the dictionary.
    *
    * If not specified, the empty string is used


[4/6] lucene-solr:branch_6x: SOLR-9193: Add scoreNodes Streaming Expression

Posted by jb...@apache.org.
SOLR-9193: Add scoreNodes Streaming Expression

Conflicts:
	solr/core/src/java/org/apache/solr/handler/StreamHandler.java


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/879a245e
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/879a245e
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/879a245e

Branch: refs/heads/branch_6x
Commit: 879a245e4e0b63edaa240e1e138223dd9e86b301
Parents: 1427f4b
Author: jbernste <jb...@apache.org>
Authored: Mon Jul 4 20:35:10 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../org/apache/solr/handler/GraphHandler.java   |   2 +-
 .../org/apache/solr/handler/StreamHandler.java  |   1 +
 .../solr/handler/component/SearchComponent.java |   2 +
 .../solr/handler/component/SearchHandler.java   |   2 +
 .../solr/handler/component/TermsComponent.java  |  27 ++
 solr/core/src/resources/ImplicitPlugins.json    |   6 +
 .../test/org/apache/solr/MinimalSchemaTest.java |   4 +-
 .../test/org/apache/solr/core/SolrCoreTest.java |   1 +
 .../DistributedTermsComponentTest.java          |   2 +
 .../handler/component/TermsComponentTest.java   |  12 +
 .../solrj/io/stream/ScoreNodesStream.java       | 245 +++++++++++++++++++
 .../apache/solr/common/params/TermsParams.java  |   6 +
 .../solrj/io/graph/GraphExpressionTest.java     |  95 +++++++
 13 files changed, 403 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
index 657e6b3..c4b42d9 100644
--- a/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/GraphHandler.java
@@ -118,7 +118,7 @@ public class GraphHandler extends RequestHandlerBase implements SolrCoreAware, P
         .withFunctionName("shortestPath", ShortestPathStream.class)
         .withFunctionName("gatherNodes", GatherNodesStream.class)
         .withFunctionName("sort", SortStream.class)
-
+            .withFunctionName("scoreNodes", ScoreNodesStream.class)
 
         // metrics
         .withFunctionName("min", MinMetric.class)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
index 91ee096..1e9ba27 100644
--- a/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/StreamHandler.java
@@ -126,6 +126,7 @@ public class StreamHandler extends RequestHandlerBase implements SolrCoreAware,
       .withFunctionName("select", SelectStream.class)
          .withFunctionName("shortestPath", ShortestPathStream.class)
          .withFunctionName("gatherNodes", GatherNodesStream.class)
+         .withFunctionName("scoreNodes", ScoreNodesStream.class)
 
       // metrics
       .withFunctionName("min", MinMetric.class)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
index 7b70708..6ef0ee4 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SearchComponent.java
@@ -140,6 +140,8 @@ public abstract class SearchComponent implements SolrInfoMBean, NamedListInitial
     map.put(DebugComponent.COMPONENT_NAME, DebugComponent.class);
     map.put(RealTimeGetComponent.COMPONENT_NAME, RealTimeGetComponent.class);
     map.put(ExpandComponent.COMPONENT_NAME, ExpandComponent.class);
+    map.put(TermsComponent.COMPONENT_NAME, TermsComponent.class);
+
     standard_components = Collections.unmodifiableMap(map);
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
index f4a1776..e8362dc 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
@@ -85,6 +85,8 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware ,
     names.add( StatsComponent.COMPONENT_NAME );
     names.add( DebugComponent.COMPONENT_NAME );
     names.add( ExpandComponent.COMPONENT_NAME);
+    names.add( TermsComponent.COMPONENT_NAME);
+
     return names;
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index 8a0bad3..28649db 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -93,6 +93,14 @@ public class TermsComponent extends SearchComponent {
 
     if (fields == null || fields.length==0) return;
 
+    boolean termStats = params.getBool(TermsParams.TERMS_STATS, false);
+
+    if(termStats) {
+      NamedList<Number> stats = new SimpleOrderedMap();
+      rb.rsp.add("stats", stats);
+      collectStats(rb.req.getSearcher(), stats);
+    }
+
     String termList = params.get(TermsParams.TERMS_LIST);
     if(termList != null) {
       fetchTerms(rb.req.getSearcher(), fields, termList, termsResult);
@@ -291,6 +299,13 @@ public class TermsComponent extends SearchComponent {
         @SuppressWarnings("unchecked")
         NamedList<NamedList<Number>> terms = (NamedList<NamedList<Number>>) srsp.getSolrResponse().getResponse().get("terms");
         th.parse(terms);
+
+
+        NamedList<Number> stats = (NamedList<Number>)srsp.getSolrResponse().getResponse().get("stats");
+        if(stats != null) {
+          th.numDocs += stats.get("numDocs").longValue();
+          th.stats = true;
+        }
       }
     }
   }
@@ -305,6 +320,11 @@ public class TermsComponent extends SearchComponent {
     NamedList terms = ti.buildResponse();
 
     rb.rsp.add("terms", terms);
+    if(ti.stats) {
+      NamedList<Number> stats = new SimpleOrderedMap();
+      stats.add("numDocs", Long.valueOf(ti.numDocs));
+      rb.rsp.add("stats", stats);
+    }
     rb._termsHelper = null;
   }
 
@@ -331,6 +351,8 @@ public class TermsComponent extends SearchComponent {
     // map to store returned terms
     private HashMap<String, HashMap<String, TermsResponse.Term>> fieldmap;
     private SolrParams params;
+    public long numDocs = 0;
+    public boolean stats;
 
     public TermsHelper() {
       fieldmap = new HashMap<>(5);
@@ -547,6 +569,11 @@ public class TermsComponent extends SearchComponent {
     }
   }
 
+  private void collectStats(SolrIndexSearcher searcher, NamedList<Number> stats) {
+    int numDocs = searcher.getTopReaderContext().reader().numDocs();
+    stats.add("numDocs", Long.valueOf(numDocs));
+  }
+
   @Override
   public String getDescription() {
     return "A Component for working with Term Enumerators";

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/resources/ImplicitPlugins.json
----------------------------------------------------------------------
diff --git a/solr/core/src/resources/ImplicitPlugins.json b/solr/core/src/resources/ImplicitPlugins.json
index 325bf91..58f6b79 100644
--- a/solr/core/src/resources/ImplicitPlugins.json
+++ b/solr/core/src/resources/ImplicitPlugins.json
@@ -104,6 +104,12 @@
         "wt": "json",
         "distrib": false
       }
+    },
+    "/terms": {
+      "class": "solr.SearchHandler",
+      "components": [
+        "terms"
+      ]
     }
   }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java b/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
index 4926dd4..af058d0 100644
--- a/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
+++ b/solr/core/src/test/org/apache/solr/MinimalSchemaTest.java
@@ -114,7 +114,9 @@ public class MinimalSchemaTest extends SolrTestCaseJ4 {
             handler.startsWith("/export") ||
             handler.startsWith("/graph") ||
             handler.startsWith("/sql") ||
-            handler.startsWith("/stream")
+            handler.startsWith("/stream") ||
+            handler.startsWith("/terms")
+
             ) {
           continue;
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
index 049d5e7..75dbf0c 100644
--- a/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
+++ b/solr/core/src/test/org/apache/solr/core/SolrCoreTest.java
@@ -98,6 +98,7 @@ public class SolrCoreTest extends SolrTestCaseJ4 {
       ++ihCount; assertEquals(pathToClassMap.get("/admin/threads"), "solr.ThreadDumpHandler");
       ++ihCount; assertEquals(pathToClassMap.get("/config"), "solr.SolrConfigHandler");
       ++ihCount; assertEquals(pathToClassMap.get("/export"), "solr.SearchHandler");
+      ++ihCount; assertEquals(pathToClassMap.get("/terms"), "solr.SearchHandler");
       ++ihCount; assertEquals(pathToClassMap.get("/get"), "solr.RealTimeGetHandler");
       ++ihCount; assertEquals(pathToClassMap.get(ReplicationHandler.PATH), "solr.ReplicationHandler");
       ++ihCount; assertEquals(pathToClassMap.get("/schema"), "solr.SchemaHandler");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
index dba7cc4..951cd88 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/DistributedTermsComponentTest.java
@@ -51,6 +51,8 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra, ant, bad");
     query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
+    query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.stats", "true","terms.list", "2, 3, 1");
+
 
   }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
index 19bd4e1..473b727 100644
--- a/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/TermsComponentTest.java
@@ -180,6 +180,7 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
         ,"//lst[@name='standardfilt']/int[4][@name='spider'][.='1']"
     );
 
+
     //Test with numeric terms
     assertQ(req("indent","true", "qt","/terms",  "terms","true",
             "terms.fl","foo_i",
@@ -190,6 +191,17 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
     );
   }
 
+
+  @Test
+  public void testStats() throws Exception {
+    //Terms list always returns in index order
+    assertQ(req("indent", "true", "qt", "/terms", "terms", "true",
+            "terms.fl", "standardfilt","terms.stats", "true",
+            "terms.list", "spider, snake, shark, ddddd, bad")
+        , "//lst[@name='stats']/int[1][@name='numDocs'][.='23']"
+    );
+  }
+
   @Test
   public void testSortIndex() throws Exception {
     assertQ(req("indent","true", "qt","/terms",  "terms","true",

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
new file mode 100644
index 0000000..f2aa070
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
@@ -0,0 +1,245 @@
+package org.apache.solr.client.solrj.io.stream;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.HashMap;
+
+import org.apache.solr.client.solrj.impl.CloudSolrClient;
+import org.apache.solr.client.solrj.io.SolrClientCache;
+import org.apache.solr.client.solrj.io.Tuple;
+import org.apache.solr.client.solrj.io.comp.StreamComparator;
+import org.apache.solr.client.solrj.io.stream.expr.Explanation;
+import org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType;
+import org.apache.solr.client.solrj.io.stream.expr.Expressible;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExplanation;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpression;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter;
+import org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue;
+import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
+import org.apache.solr.client.solrj.request.QueryRequest;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.TermsParams;
+import org.apache.solr.common.util.NamedList;
+
+/**
+ *  Iterates over a gatherNodes() expression and scores the node Tuples based based on tf-idf.
+ *
+ *  Expression Syntax:
+ *
+ *  Default function call uses the "count(*)" value for node freq.
+ *
+ *  You can use a different value for node freq by providing the nodeFreq param
+ *  scoreNodes(gatherNodes(...), nodeFreq="min(weight)")
+ *
+ **/
+
+public class ScoreNodesStream extends TupleStream implements Expressible
+{
+
+  private static final long serialVersionUID = 1;
+
+  protected String zkHost;
+  private TupleStream stream;
+  private transient SolrClientCache clientCache;
+  private Map<String, Tuple> nodes = new HashMap();
+  private Iterator<Tuple> tuples;
+  private String termFreq;
+
+  public ScoreNodesStream(TupleStream tupleStream, String nodeFreqField) throws IOException {
+    init(tupleStream, nodeFreqField);
+  }
+
+  public ScoreNodesStream(StreamExpression expression, StreamFactory factory) throws IOException {
+    // grab all parameters out
+    List<StreamExpression> streamExpressions = factory.getExpressionOperandsRepresentingTypes(expression, Expressible.class, TupleStream.class);
+    StreamExpressionNamedParameter nodeFreqParam = factory.getNamedOperand(expression, "termFreq");
+
+    String docFreqField = "count(*)";
+    if(nodeFreqParam != null) {
+      docFreqField = nodeFreqParam.getParameter().toString();
+    }
+
+    if(1 != streamExpressions.size()){
+      throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - expecting a single stream but found %d",expression, streamExpressions.size()));
+    }
+
+    zkHost = factory.getDefaultZkHost();
+
+    if(null == zkHost){
+      throw new IOException("zkHost not found");
+    }
+
+    TupleStream stream = factory.constructStream(streamExpressions.get(0));
+
+    init(stream, docFreqField);
+  }
+
+  private void init(TupleStream tupleStream, String termFreq) throws IOException{
+    this.stream = tupleStream;
+    this.termFreq = termFreq;
+  }
+
+  @Override
+  public StreamExpression toExpression(StreamFactory factory) throws IOException{
+    return toExpression(factory, true);
+  }
+
+  private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException {
+    // function name
+    StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
+
+    // nodeFreqField
+    expression.addParameter(new StreamExpressionNamedParameter("termFreq", termFreq));
+
+    if(includeStreams){
+      // stream
+      if(stream instanceof Expressible){
+        expression.addParameter(((Expressible)stream).toExpression(factory));
+      }
+      else{
+        throw new IOException("This ScoreNodesStream contains a non-expressible TupleStream - it cannot be converted to an expression");
+      }
+    }
+    else{
+      expression.addParameter("<stream>");
+    }
+
+    return expression;
+  }
+
+  @Override
+  public Explanation toExplanation(StreamFactory factory) throws IOException {
+
+    return new StreamExplanation(getStreamNodeId().toString())
+        .withChildren(new Explanation[]{
+            stream.toExplanation(factory)
+        })
+        .withFunctionName(factory.getFunctionName(this.getClass()))
+        .withImplementingClass(this.getClass().getName())
+        .withExpressionType(ExpressionType.STREAM_DECORATOR)
+        .withExpression(toExpression(factory, false).toString());
+  }
+
+  public void setStreamContext(StreamContext context) {
+    this.clientCache = context.getSolrClientCache();
+    this.stream.setStreamContext(context);
+  }
+
+  public List<TupleStream> children() {
+    List<TupleStream> l =  new ArrayList();
+    l.add(stream);
+    return l;
+  }
+
+  public void open() throws IOException {
+    stream.open();
+    Tuple node = null;
+    StringBuilder builder = new StringBuilder();
+    String field = null;
+    String collection = null;
+    while(true) {
+      node = stream.read();
+      if(node.EOF) {
+        break;
+      }
+
+      String nodeId = node.getString("node");
+      nodes.put(nodeId, node);
+      if(builder.length() > 0) {
+        builder.append(",");
+        field = node.getString("field");
+        collection = node.getString("collection");
+      }
+      builder.append(nodeId);
+    }
+
+    CloudSolrClient client = clientCache.getCloudSolrClient(zkHost);
+    ModifiableSolrParams params = new ModifiableSolrParams();
+    params.add(CommonParams.QT, "/terms");
+    params.add(TermsParams.TERMS, "true");
+    params.add(TermsParams.TERMS_FIELD, field);
+    params.add(TermsParams.TERMS_STATS, "true");
+    params.add(TermsParams.TERMS_LIST, builder.toString());
+    QueryRequest request = new QueryRequest(params);
+
+
+    try {
+
+      //Get the response from the terms component
+      NamedList response = client.request(request, collection);
+      NamedList<Number> stats = (NamedList<Number>)response.get("stats");
+      long numDocs = stats.get("numDocs").longValue();
+      NamedList<NamedList<Number>> fields = (NamedList<NamedList<Number>>)response.get("terms");
+
+      int size = fields.size();
+      for(int i=0; i<size; i++) {
+        String fieldName = fields.getName(i);
+        NamedList<Number> terms = fields.get(fieldName);
+        int tsize = terms.size();
+        for(int t=0; t<tsize; t++) {
+          String term = terms.getName(t);
+          Number docFreq = terms.get(term);
+          Tuple tuple = nodes.get(term);
+          Number termFreqValue = (Number)tuple.get(termFreq);
+          float score = termFreqValue.floatValue() * (float) (Math.log((numDocs + 1) / (docFreq.doubleValue() + 1)) + 1.0);
+          tuple.put("nodeScore", score);
+          tuple.put("docFreq", docFreq);
+          tuple.put("numDocs", numDocs);
+        }
+      }
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+
+    tuples = nodes.values().iterator();
+  }
+
+  public void close() throws IOException {
+    stream.close();
+  }
+
+  public StreamComparator getComparator(){
+    return null;
+  }
+
+  public Tuple read() throws IOException {
+    if(tuples.hasNext()) {
+      return tuples.next();
+    } else {
+      Map map = new HashMap();
+      map.put("EOF", true);
+      return new Tuple(map);
+    }
+  }
+
+  public StreamComparator getStreamSort(){
+    return null;
+  }
+
+  public int getCost() {
+    return 0;
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
index 470b14d..d719500 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/TermsParams.java
@@ -45,6 +45,12 @@ public interface TermsParams {
   public static final String TERMS_LIST = TERMS_PREFIX + "list";
 
   /**
+   * Optional. The list of terms to be retrieved.
+   *
+   */
+  public static final String TERMS_STATS = TERMS_PREFIX + "stats";
+
+  /**
    * Optional.  The lower bound term to start at.  The TermEnum will start at the next term after this term in the dictionary.
    *
    * If not specified, the empty string is used

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/879a245e/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
index 79579d1..9dbd706 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
@@ -40,6 +40,8 @@ import org.apache.solr.client.solrj.io.comp.ComparatorOrder;
 import org.apache.solr.client.solrj.io.comp.FieldComparator;
 import org.apache.solr.client.solrj.io.stream.CloudSolrStream;
 import org.apache.solr.client.solrj.io.stream.HashJoinStream;
+import org.apache.solr.client.solrj.io.stream.ScoreNodesStream;
+import org.apache.solr.client.solrj.io.stream.SortStream;
 import org.apache.solr.client.solrj.io.stream.StreamContext;
 import org.apache.solr.client.solrj.io.stream.TupleStream;
 import org.apache.solr.client.solrj.io.stream.expr.StreamFactory;
@@ -384,6 +386,94 @@ public class GraphExpressionTest extends SolrCloudTestCase {
 
   }
 
+
+  @Test
+  public void testScoreNodesStream() throws Exception {
+
+
+    new UpdateRequest()
+        .add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "20")
+        .add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "30")
+        .add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "1")
+        .add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "2")
+        .add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "5")
+        .add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "10")
+        .add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "20")
+        .add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "10")
+        .add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "10")
+        .add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "40")
+        .add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "10")
+        .add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "10")
+        .add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "10")
+        .add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "10")
+        .add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "10")
+        .add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "10")
+        .commit(cluster.getSolrClient(), COLLECTION);
+
+    List<Tuple> tuples = null;
+    Set<String> paths = null;
+    TupleStream stream = null;
+    StreamContext context = new StreamContext();
+    SolrClientCache cache = new SolrClientCache();
+    context.setSolrClientCache(cache);
+
+    StreamFactory factory = new StreamFactory()
+        .withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress())
+        .withDefaultZkHost(cluster.getZkServer().getZkAddress())
+        .withFunctionName("gatherNodes", GatherNodesStream.class)
+        .withFunctionName("scoreNodes", ScoreNodesStream.class)
+        .withFunctionName("search", CloudSolrStream.class)
+        .withFunctionName("sort", SortStream.class)
+        .withFunctionName("count", CountMetric.class)
+        .withFunctionName("avg", MeanMetric.class)
+        .withFunctionName("sum", SumMetric.class)
+        .withFunctionName("min", MinMetric.class)
+        .withFunctionName("max", MaxMetric.class);
+
+    String expr = "gatherNodes(collection1, " +
+                               "walk=\"product3->product_s\"," +
+                               "gather=\"basket_s\")";
+
+
+    String expr2 = "sort(by=\"nodeScore desc\", " +
+                         "scoreNodes(gatherNodes(collection1, " +
+                                                 expr+","+
+                                                 "walk=\"node->basket_s\"," +
+                                                 "gather=\"product_s\", " +
+                                                 "count(*), " +
+                                                 "avg(price_f), " +
+                                                 "sum(price_f), " +
+                                                 "min(price_f), " +
+                                                 "max(price_f))))";
+
+    stream = factory.constructStream(expr2);
+
+    context = new StreamContext();
+    context.setSolrClientCache(cache);
+
+    stream.setStreamContext(context);
+
+    tuples = getTuples(stream);
+
+    Tuple tuple0 = tuples.get(0);
+    assert(tuple0.getString("node").equals("product4"));
+    assert(tuple0.getLong("docFreq") == 2);
+    assert(tuple0.getLong("count(*)") == 2);
+
+    Tuple tuple1 = tuples.get(1);
+    assert(tuple1.getString("node").equals("product1"));
+    assert(tuple1.getLong("docFreq") == 8);
+    assert(tuple1.getLong("count(*)") == 3);
+
+    Tuple tuple2 = tuples.get(2);
+    assert(tuple2.getString("node").equals("product5"));
+    assert(tuple2.getLong("docFreq") == 1);
+    assert(tuple2.getLong("count(*)") == 1);
+
+    cache.close();
+  }
+
+
   @Test
   public void testGatherNodesFriendsStream() throws Exception {
 
@@ -707,6 +797,11 @@ public class GraphExpressionTest extends SolrCloudTestCase {
     client.close();
   }
 
+
+
+
+
+
   private String readString(InputStreamReader reader) throws Exception{
     StringBuilder builder = new StringBuilder();
     int c = 0;


[2/6] lucene-solr:branch_6x: SOLR-9193: Fix conflict between parameters of TermsComponent and json facet API

Posted by jb...@apache.org.
SOLR-9193: Fix conflict between parameters of TermsComponent and json facet API


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/bc0eac8b
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/bc0eac8b
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/bc0eac8b

Branch: refs/heads/branch_6x
Commit: bc0eac8b6b95bfc4d6cfa612b494fc184cee1a8c
Parents: ed86e01
Author: jbernste <jb...@apache.org>
Authored: Tue Jul 5 00:48:16 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../org/apache/solr/handler/component/TermsComponent.java    | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/bc0eac8b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
index daf5b5b..076c4eb 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/TermsComponent.java
@@ -65,7 +65,9 @@ public class TermsComponent extends SearchComponent {
   @Override
   public void prepare(ResponseBuilder rb) throws IOException {
     SolrParams params = rb.req.getParams();
-    if (params.getBool(TermsParams.TERMS, false)) {
+
+    //the terms parameter is also used by json facet API. So we will get errors if we try to parse as boolean
+    if (params.get(TermsParams.TERMS, "false").equals("true")) {
       rb.doTerms = true;
     } else {
       return;
@@ -86,7 +88,9 @@ public class TermsComponent extends SearchComponent {
   @Override
   public void process(ResponseBuilder rb) throws IOException {
     SolrParams params = rb.req.getParams();
-    if (!params.getBool(TermsParams.TERMS, false)) return;
+    if (!params.get(TermsParams.TERMS, "false").equals("true")) {
+      return;
+    }
 
     String[] fields = params.getParams(TermsParams.TERMS_FIELD);
 


[5/6] lucene-solr:branch_6x: SOLR-9193: Added test using the termFreq param and basic error handling

Posted by jb...@apache.org.
SOLR-9193: Added test using the termFreq param and basic error handling


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7a5e6a5f
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7a5e6a5f
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7a5e6a5f

Branch: refs/heads/branch_6x
Commit: 7a5e6a5f7e479b0950cf0d26484f8789c5aa5fcf
Parents: bc0eac8
Author: jbernste <jb...@apache.org>
Authored: Tue Jul 5 13:30:52 2016 -0400
Committer: jbernste <jb...@apache.org>
Committed: Wed Jul 6 16:58:40 2016 -0400

----------------------------------------------------------------------
 .../solrj/io/stream/ScoreNodesStream.java       |  9 +++
 .../solrj/io/graph/GraphExpressionTest.java     | 70 +++++++++++++++-----
 2 files changed, 62 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a5e6a5f/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
index 0d305fd..814b69c 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/ScoreNodesStream.java
@@ -165,7 +165,13 @@ public class ScoreNodesStream extends TupleStream implements Expressible
         break;
       }
 
+      if(!node.fields.containsKey("node")) {
+        throw new IOException("node field not present in the Tuple");
+      }
+
       String nodeId = node.getString("node");
+
+
       nodes.put(nodeId, node);
       if(builder.length() > 0) {
         builder.append(",");
@@ -202,6 +208,9 @@ public class ScoreNodesStream extends TupleStream implements Expressible
           String term = terms.getName(t);
           Number docFreq = terms.get(term);
           Tuple tuple = nodes.get(term);
+          if(!tuple.fields.containsKey(termFreq)) {
+            throw new Exception("termFreq field not present in the Tuple");
+          }
           Number termFreqValue = (Number)tuple.get(termFreq);
           float score = termFreqValue.floatValue() * (float) (Math.log((numDocs + 1) / (docFreq.doubleValue() + 1)) + 1.0);
           tuple.put("nodeScore", score);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7a5e6a5f/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
index 9dbd706..a141b73 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/graph/GraphExpressionTest.java
@@ -392,26 +392,25 @@ public class GraphExpressionTest extends SolrCloudTestCase {
 
 
     new UpdateRequest()
-        .add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "20")
-        .add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "30")
-        .add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "1")
-        .add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "2")
-        .add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "5")
-        .add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "10")
-        .add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "20")
-        .add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "10")
-        .add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "10")
-        .add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "40")
-        .add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "10")
-        .add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "10")
-        .add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "10")
-        .add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "10")
-        .add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "10")
-        .add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "10")
+        .add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "1")
+        .add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "1")
+        .add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "100")
+        .add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "1")
+        .add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "1")
+        .add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "1")
+        .add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "1")
+        .add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "1")
+        .add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "1")
+        .add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "1")
+        .add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "1")
+        .add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "1")
+        .add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "1")
+        .add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "1")
+        .add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "1")
+        .add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "1")
         .commit(cluster.getSolrClient(), COLLECTION);
 
     List<Tuple> tuples = null;
-    Set<String> paths = null;
     TupleStream stream = null;
     StreamContext context = new StreamContext();
     SolrClientCache cache = new SolrClientCache();
@@ -470,6 +469,43 @@ public class GraphExpressionTest extends SolrCloudTestCase {
     assert(tuple2.getLong("docFreq") == 1);
     assert(tuple2.getLong("count(*)") == 1);
 
+
+    //Test using a different termFreq field then the default count(*)
+    expr2 = "sort(by=\"nodeScore desc\", " +
+                 "scoreNodes(termFreq=\"avg(price_f)\",gatherNodes(collection1, " +
+                                                                   expr+","+
+                                                                   "walk=\"node->basket_s\"," +
+                                                                   "gather=\"product_s\", " +
+                                                                   "count(*), " +
+                                                                   "avg(price_f), " +
+                                                                   "sum(price_f), " +
+                                                                   "min(price_f), " +
+                                                                   "max(price_f))))";
+
+    stream = factory.constructStream(expr2);
+
+    context = new StreamContext();
+    context.setSolrClientCache(cache);
+
+    stream.setStreamContext(context);
+
+    tuples = getTuples(stream);
+
+    tuple0 = tuples.get(0);
+    assert(tuple0.getString("node").equals("product5"));
+    assert(tuple0.getLong("docFreq") == 1);
+    assert(tuple0.getDouble("avg(price_f)") == 100);
+
+    tuple1 = tuples.get(1);
+    assert(tuple1.getString("node").equals("product4"));
+    assert(tuple1.getLong("docFreq") == 2);
+    assert(tuple1.getDouble("avg(price_f)") == 1);
+
+    tuple2 = tuples.get(2);
+    assert(tuple2.getString("node").equals("product1"));
+    assert(tuple2.getLong("docFreq") == 8);
+    assert(tuple2.getDouble("avg(price_f)") == 1);
+
     cache.close();
   }