You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by kr...@apache.org on 2016/10/27 20:09:52 UTC

[48/50] [abbrv] lucene-solr:jira/solr-8593: SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client

SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2172f3e0
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2172f3e0
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2172f3e0

Branch: refs/heads/jira/solr-8593
Commit: 2172f3e0081e3e59ce8b02c2bb5654a592f79f74
Parents: 0feca1a
Author: Shalin Shekhar Mangar <sh...@apache.org>
Authored: Thu Oct 27 17:41:25 2016 +0530
Committer: Shalin Shekhar Mangar <sh...@apache.org>
Committed: Thu Oct 27 17:41:25 2016 +0530

----------------------------------------------------------------------
 solr/CHANGES.txt                                |   3 +
 .../component/MoreLikeThisComponent.java        |   2 +-
 .../org/apache/solr/client/solrj/SolrQuery.java | 248 +++++++++++++++++++
 .../client/solrj/response/QueryResponse.java    |   8 +
 .../solr/common/params/MoreLikeThisParams.java  |   3 +
 .../solr/client/solrj/SolrExampleTests.java     |  21 +-
 .../apache/solr/client/solrj/SolrQueryTest.java |  25 ++
 7 files changed, 299 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index ae1d709..4ef1061 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -170,6 +170,9 @@ New Features
 
 * SOLR-9559: Add ExecutorStream to execute stored Streaming Expressions (Joel Bernstein)
 
+* SOLR-1085: Add support for MoreLikeThis queries and responses in SolrJ client.
+  (Maurice Jumelet, Bill Mitchell, Cao Manh Dat via shalin)
+
 Bug Fixes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
index 6ccdd12..7cf6d39 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/MoreLikeThisComponent.java
@@ -173,7 +173,7 @@ public class MoreLikeThisComponent extends SearchComponent {
         && rb.req.getParams().getBool(COMPONENT_NAME, false)) {
       Map<Object,SolrDocumentList> tempResults = new LinkedHashMap<>();
       
-      int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, 5);
+      int mltcount = rb.req.getParams().getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT);
       String keyName = rb.req.getSchema().getUniqueKeyField().getName();
       
       for (ShardRequest sreq : rb.finished) {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
index 7eee7be..e6d3d69 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/SolrQuery.java
@@ -27,6 +27,7 @@ import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.params.MoreLikeThisParams;
 import org.apache.solr.common.params.StatsParams;
 import org.apache.solr.common.params.TermsParams;
 
@@ -801,6 +802,253 @@ public class SolrQuery extends ModifiableSolrParams
     return this;
   }
 
+
+  /**
+   * Add field for MoreLikeThis. Automatically
+   * enables MoreLikeThis.
+   *
+   * @param field the names of the field to be added
+   * @return this
+   */
+  public SolrQuery addMoreLikeThisField(String field) {
+    this.setMoreLikeThis(true);
+    return addValueToParam(MoreLikeThisParams.SIMILARITY_FIELDS, field);
+  }
+
+  public SolrQuery setMoreLikeThisFields(String... fields) {
+    if( fields == null || fields.length == 0 ) {
+      this.remove( MoreLikeThisParams.SIMILARITY_FIELDS );
+      this.setMoreLikeThis(false);
+      return this;
+    }
+
+    StringBuilder sb = new StringBuilder();
+    sb.append(fields[0]);
+    for (int i = 1; i < fields.length; i++) {
+      sb.append(',');
+      sb.append(fields[i]);
+    }
+    this.set(MoreLikeThisParams.SIMILARITY_FIELDS, sb.toString());
+    this.setMoreLikeThis(true);
+    return this;
+  }
+
+  /**
+   * @return an array with the fields used to compute similarity.
+   */
+  public String[] getMoreLikeThisFields() {
+    String fl = this.get(MoreLikeThisParams.SIMILARITY_FIELDS);
+    if(fl==null || fl.length()==0) {
+      return null;
+    }
+    return fl.split(",");
+  }
+
+  /**
+   * Sets the frequency below which terms will be ignored in the source doc
+   *
+   * @param mintf the minimum term frequency
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMinTermFreq(int mintf) {
+    this.set(MoreLikeThisParams.MIN_TERM_FREQ, mintf);
+    return this;
+  }
+
+  /**
+   * Gets the frequency below which terms will be ignored in the source doc
+   */
+  public int getMoreLikeThisMinTermFreq() {
+    return this.getInt(MoreLikeThisParams.MIN_TERM_FREQ, 2);
+  }
+
+  /**
+   * Sets the frequency at which words will be ignored which do not occur in
+   * at least this many docs.
+   *
+   * @param mindf the minimum document frequency
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMinDocFreq(int mindf) {
+    this.set(MoreLikeThisParams.MIN_DOC_FREQ, mindf);
+    return this;
+  }
+
+  /**
+   * Gets the frequency at which words will be ignored which do not occur in
+   * at least this many docs.
+   */
+  public int getMoreLikeThisMinDocFreq() {
+    return this.getInt(MoreLikeThisParams.MIN_DOC_FREQ, 5);
+  }
+
+  /**
+   * Sets the minimum word length below which words will be ignored.
+   *
+   * @param minwl the minimum word length
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMinWordLen(int minwl) {
+    this.set(MoreLikeThisParams.MIN_WORD_LEN, minwl);
+    return this;
+  }
+
+  /**
+   * Gets the minimum word length below which words will be ignored.
+   */
+  public int getMoreLikeThisMinWordLen() {
+    return this.getInt(MoreLikeThisParams.MIN_WORD_LEN, 0);
+  }
+
+  /**
+   * Sets the maximum word length above which words will be ignored.
+   *
+   * @param maxwl the maximum word length
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMaxWordLen(int maxwl) {
+    this.set(MoreLikeThisParams.MAX_WORD_LEN, maxwl);
+    return this;
+  }
+
+  /**
+   * Gets the maximum word length above which words will be ignored.
+   */
+  public int getMoreLikeThisMaxWordLen() {
+    return this.getInt(MoreLikeThisParams.MAX_WORD_LEN, 0);
+  }
+
+  /**
+   * Sets the maximum number of query terms that will be included in any
+   * generated query.
+   *
+   * @param maxqt the maximum number of query terms
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMaxQueryTerms(int maxqt) {
+    this.set(MoreLikeThisParams.MAX_QUERY_TERMS, maxqt);
+    return this;
+  }
+
+  /**
+   * Gets the maximum number of query terms that will be included in any
+   * generated query.
+   */
+  public int getMoreLikeThisMaxQueryTerms() {
+    return this.getInt(MoreLikeThisParams.MAX_QUERY_TERMS, 25);
+  }
+
+  /**
+   * Sets the maximum number of tokens to parse in each example doc field
+   * that is not stored with TermVector support.
+   *
+   * @param maxntp the maximum number of tokens to parse
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisMaxTokensParsed(int maxntp) {
+    this.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, maxntp);
+    return this;
+  }
+
+  /**
+   * Gets the maximum number of tokens to parse in each example doc field
+   * that is not stored with TermVector support.
+   */
+  public int getMoreLikeThisMaxTokensParsed() {
+    return this.getInt(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 5000);
+  }
+
+  /**
+   * Sets if the query will be boosted by the interesting term relevance.
+   *
+   * @param b set to true to boost the query with the interesting term relevance
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisBoost(boolean b) {
+    this.set(MoreLikeThisParams.BOOST, b);
+    return this;
+  }
+
+  /**
+   * Gets if the query will be boosted by the interesting term relevance.
+   */
+  public boolean getMoreLikeThisBoost() {
+    return this.getBool(MoreLikeThisParams.BOOST, false);
+  }
+
+  /**
+   * Sets the query fields and their boosts using the same format as that
+   * used in DisMaxQParserPlugin. These fields must also be added
+   * using {@link #addMoreLikeThisField(String)}.
+   *
+   * @param qf the query fields
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisQF(String qf) {
+    this.set(MoreLikeThisParams.QF, qf);
+    return this;
+  }
+
+  /**
+   * Gets the query fields and their boosts.
+   */
+  public String getMoreLikeThisQF() {
+    return this.get(MoreLikeThisParams.QF);
+  }
+
+  /**
+   * Sets the number of similar documents to return for each result.
+   *
+   * @param count the number of similar documents to return for each result
+   * @return this
+   */
+  public SolrQuery setMoreLikeThisCount(int count) {
+    this.set(MoreLikeThisParams.DOC_COUNT, count);
+    return this;
+  }
+
+  /**
+   * Gets the number of similar documents to return for each result.
+   */
+  public int getMoreLikeThisCount() {
+    return this.getInt(MoreLikeThisParams.DOC_COUNT, MoreLikeThisParams.DEFAULT_DOC_COUNT);
+  }
+
+  /**
+   * Enable/Disable MoreLikeThis. After enabling MoreLikeThis, the fields
+   * used for computing similarity must be specified calling
+   * {@link #addMoreLikeThisField(String)}.
+   *
+   * @param b flag to indicate if MoreLikeThis should be enabled. if b==false
+   * removes all mlt.* parameters
+   * @return this
+   */
+  public SolrQuery setMoreLikeThis(boolean b) {
+    if(b) {
+      this.set(MoreLikeThisParams.MLT, true);
+    } else {
+      this.remove(MoreLikeThisParams.MLT);
+      this.remove(MoreLikeThisParams.SIMILARITY_FIELDS);
+      this.remove(MoreLikeThisParams.MIN_TERM_FREQ);
+      this.remove(MoreLikeThisParams.MIN_DOC_FREQ);
+      this.remove(MoreLikeThisParams.MIN_WORD_LEN);
+      this.remove(MoreLikeThisParams.MAX_WORD_LEN);
+      this.remove(MoreLikeThisParams.MAX_QUERY_TERMS);
+      this.remove(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED);
+      this.remove(MoreLikeThisParams.BOOST);
+      this.remove(MoreLikeThisParams.QF);
+      this.remove(MoreLikeThisParams.DOC_COUNT);
+    }
+    return this;
+  }
+
+  /**
+   * @return true if MoreLikeThis is enabled, false otherwise
+   */
+  public boolean getMoreLikeThis() {
+    return this.getBool(MoreLikeThisParams.MLT, false);
+  }
+
   public SolrQuery setFields(String ... fields) {
     if( fields == null || fields.length == 0 ) {
       this.remove( CommonParams.FL );

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java
index debb079..eb595aa 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/QueryResponse.java
@@ -51,6 +51,7 @@ public class QueryResponse extends SolrResponseBase
   private Map<String,NamedList<Object>> _suggestInfo = null;
   private NamedList<Object> _statsInfo = null;
   private NamedList<NamedList<Number>> _termsInfo = null;
+  private NamedList<SolrDocumentList> _moreLikeThisInfo = null;
   private String _cursorMarkNext = null;
 
   // Grouping response
@@ -168,6 +169,9 @@ public class QueryResponse extends SolrResponseBase
         _termsInfo = (NamedList<NamedList<Number>>) res.getVal( i );
         extractTermsInfo( _termsInfo );
       }
+      else if ( "moreLikeThis".equals( n ) ) {
+        _moreLikeThisInfo = (NamedList<SolrDocumentList>) res.getVal( i );
+      }
       else if ( CursorMarkParams.CURSOR_MARK_NEXT.equals( n ) ) {
         _cursorMarkNext = (String) res.getVal( i );
       }
@@ -547,6 +551,10 @@ public class QueryResponse extends SolrResponseBase
   public TermsResponse getTermsResponse() {
     return _termsResponse;
   }
+
+  public NamedList<SolrDocumentList> getMoreLikeThis() {
+    return _moreLikeThisInfo;
+  }
   
   /**
    * See also: {@link #getLimitingFacets()}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java b/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
index b41cbfd..c898fdb 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/MoreLikeThisParams.java
@@ -50,6 +50,9 @@ public interface MoreLikeThisParams
 
   // Do you want to include the original document in the results or not
   public final static String INTERESTING_TERMS = PREFIX + "interestingTerms";  // false,details,(list or true)
+
+  // the default doc count
+  public final static int DEFAULT_DOC_COUNT = 5;
   
   public enum TermStyle {
     NONE,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
index 0f91adf..88227ba 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrExampleTests.java
@@ -1996,37 +1996,38 @@ abstract public class SolrExampleTests extends SolrExampleTestsBase
     // test with mlt.fl having comma separated values
     SolrQuery q = new SolrQuery("*:*");
     q.setRows(20);
-    q.setParam("mlt", "true");
-    q.setParam("mlt.mintf", "0");
-    q.setParam("mlt.count", "2");
-    q.setParam("mlt.fl", "x_s,y_s,z_s");
+    q.setMoreLikeThisFields("x_s", "y_s", "z_s");
+    q.setMoreLikeThisMinTermFreq(0);
+    q.setMoreLikeThisCount(2);
     QueryResponse response = client.query(q);
     assertEquals(20, response.getResults().getNumFound());
-    NamedList<Object> moreLikeThis = (NamedList<Object>) response.getResponse().get("moreLikeThis");
+    NamedList<SolrDocumentList> moreLikeThis = response.getMoreLikeThis();
     assertNotNull("MoreLikeThis response should not have been null", moreLikeThis);
     for (int i=0; i<20; i++)  {
       String id = "testMoreLikeThis" + i;
-      SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id);
+      SolrDocumentList mltResp = moreLikeThis.get(id);
       assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp);
       assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0);
+      assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2);
     }
 
     // now test with multiple mlt.fl parameters
     q = new SolrQuery("*:*");
     q.setRows(20);
     q.setParam("mlt", "true");
-    q.setParam("mlt.mintf", "0");
-    q.setParam("mlt.count", "2");
     q.setParam("mlt.fl", "x_s", "y_s", "z_s");
+    q.setMoreLikeThisMinTermFreq(0);
+    q.setMoreLikeThisCount(2);
     response = client.query(q);
     assertEquals(20, response.getResults().getNumFound());
-    moreLikeThis = (NamedList<Object>) response.getResponse().get("moreLikeThis");
+    moreLikeThis = response.getMoreLikeThis();
     assertNotNull("MoreLikeThis response should not have been null", moreLikeThis);
     for (int i=0; i<20; i++)  {
       String id = "testMoreLikeThis" + i;
-      SolrDocumentList mltResp = (SolrDocumentList) moreLikeThis.get(id);
+      SolrDocumentList mltResp = moreLikeThis.get(id);
       assertNotNull("MoreLikeThis response for id=" + id + " should not be null", mltResp);
       assertTrue("MoreLikeThis response for id=" + id + " had numFound=0", mltResp.getNumFound() > 0);
+      assertTrue("MoreLikeThis response for id=" + id + " had not returned exactly 2 documents", mltResp.size() == 2);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2172f3e0/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
index 816a2cc..d27847f 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/SolrQueryTest.java
@@ -431,4 +431,29 @@ public class SolrQueryTest extends LuceneTestCase {
     assertNull(solrQuery.getParams("f.field3.facet.interval.set"));
     
   }
+
+  public void testMoreLikeThis() {
+    SolrQuery solrQuery = new SolrQuery();
+    solrQuery.addMoreLikeThisField("mlt1");
+    assertTrue(solrQuery.getMoreLikeThis());
+
+    solrQuery.addMoreLikeThisField("mlt2");
+    solrQuery.addMoreLikeThisField("mlt3");
+    solrQuery.addMoreLikeThisField("mlt4");
+    assertEquals(4, solrQuery.getMoreLikeThisFields().length);
+    solrQuery.setMoreLikeThisFields(null);
+    assertTrue(null == solrQuery.getMoreLikeThisFields());
+    assertFalse(solrQuery.getMoreLikeThis());
+
+    assertEquals(true, solrQuery.setMoreLikeThisBoost(true).getMoreLikeThisBoost());
+    assertEquals("qf", solrQuery.setMoreLikeThisQF("qf").getMoreLikeThisQF());
+    assertEquals(10, solrQuery.setMoreLikeThisMaxTokensParsed(10).getMoreLikeThisMaxTokensParsed());
+    assertEquals(11, solrQuery.setMoreLikeThisMinTermFreq(11).getMoreLikeThisMinTermFreq());
+    assertEquals(12, solrQuery.setMoreLikeThisMinDocFreq(12).getMoreLikeThisMinDocFreq());
+    assertEquals(13, solrQuery.setMoreLikeThisMaxWordLen(13).getMoreLikeThisMaxWordLen());
+    assertEquals(14, solrQuery.setMoreLikeThisMinWordLen(14).getMoreLikeThisMinWordLen());
+    assertEquals(15, solrQuery.setMoreLikeThisMaxQueryTerms(15).getMoreLikeThisMaxQueryTerms());
+    assertEquals(16, solrQuery.setMoreLikeThisCount(16).getMoreLikeThisCount());
+
+  }
 }