You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2015/11/23 20:41:08 UTC

svn commit: r1715931 - in /lucene/dev/trunk/solr: ./ core/src/java/org/apache/solr/search/mlt/ core/src/test/org/apache/solr/search/mlt/

Author: anshum
Date: Mon Nov 23 19:41:08 2015
New Revision: 1715931

URL: http://svn.apache.org/viewvc?rev=1715931&view=rev
Log:
SOLR-7912: Add boost support, and also exclude the queried document in MoreLikeThis QParser

Modified:
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1715931&r1=1715930&r2=1715931&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Mon Nov 23 19:41:08 2015
@@ -245,6 +245,9 @@ New Features
 * SOLR-8329: SchemaSimilarityFactory now supports a 'defaultSimFromFieldType' init option for using
   a fieldType name to identify which Similarity to use as a default. (hossman)
 
+* SOLR-7912: Add boost support, and also exclude the queried document in MoreLikeThis QParser
+  (Jens Wille via Anshum Gupta)
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java?rev=1715931&r1=1715930&r2=1715931&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java Mon Nov 23 19:41:08 2015
@@ -16,8 +16,15 @@ package org.apache.solr.search.mlt;
  * limitations under the License.
  */
 
+import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.StringUtils;
@@ -31,6 +38,7 @@ import org.apache.solr.response.SolrQuer
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryParsing;
+import org.apache.solr.util.SolrPluginUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -62,6 +70,8 @@ public class CloudMLTQParser extends QPa
           "document with id [" + id + "]");
     }
     
+    String[] qf = localParams.getParams("qf");
+    Map<String,Float> boostFields = new HashMap<>();
     MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
     
     if(localParams.getInt("mintf") != null)
@@ -85,11 +95,14 @@ public class CloudMLTQParser extends QPa
       mlt.setMaxDocFreq(localParams.getInt("maxdf"));
     }
 
+    if(localParams.get("boost") != null) {
+      mlt.setBoost(localParams.getBool("boost"));
+      boostFields = SolrPluginUtils.parseFieldBoosts(qf);
+    }
+
     mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
-    String[] qf = localParams.getParams("qf");
     Map<String, Collection<Object>> filteredDocument = new HashMap();
-
     ArrayList<String> fieldNames = new ArrayList();
 
     if (qf != null) {
@@ -127,7 +140,35 @@ public class CloudMLTQParser extends QPa
     }
 
     try {
-      return mlt.like(filteredDocument);
+      Query rawMLTQuery = mlt.like(filteredDocument);
+      BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
+
+      if (boostFields.size() > 0) {
+        BooleanQuery.Builder newQ = new BooleanQuery.Builder();
+        newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
+        newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
+
+        for (BooleanClause clause : boostedMLTQuery) {
+          Query q = clause.getQuery();
+          Float b = boostFields.get(((TermQuery) q).getTerm().field());
+
+          if (b != null) {
+            q = new BoostQuery(q, b);
+          }
+
+          newQ.add(q, clause.getOccur());
+        }
+
+        boostedMLTQuery = newQ.build();
+      }
+
+      // exclude current document from results
+      BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
+      realMLTQuery.setDisableCoord(true);
+      realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
+      realMLTQuery.add(createIdQuery("id", id), BooleanClause.Occur.MUST_NOT);
+
+      return realMLTQuery.build();
     } catch (IOException e) {
       e.printStackTrace();
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
@@ -150,4 +191,17 @@ public class CloudMLTQParser extends QPa
     return (SolrDocument) response.get("doc");
   }
 
+  private Query createIdQuery(String defaultField, String uniqueValue) {
+    return new TermQuery(req.getSchema().getField(defaultField).getType().getNumericType() != null
+        ? createNumericTerm(defaultField, uniqueValue)
+        : new Term(defaultField, uniqueValue));
+  }
+
+  private Term createNumericTerm(String field, String uniqueValue) {
+    BytesRefBuilder bytesRefBuilder = new BytesRefBuilder();
+    bytesRefBuilder.grow(NumericUtils.BUF_SIZE_INT);
+    NumericUtils.intToPrefixCoded(Integer.parseInt(uniqueValue), 0, bytesRefBuilder);
+    return new Term(field, bytesRefBuilder.toBytesRef());
+  }
+
 }

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java?rev=1715931&r1=1715930&r2=1715931&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java Mon Nov 23 19:41:08 2015
@@ -18,6 +18,9 @@ package org.apache.solr.search.mlt;
 
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.BoostQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
@@ -32,9 +35,11 @@ import org.apache.solr.schema.SchemaFiel
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.QueryParsing;
 import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -55,6 +60,7 @@ public class SimpleMLTQParser extends QP
 
     SolrIndexSearcher searcher = req.getSearcher();
     Query docIdQuery = createIdQuery(defaultField, uniqueValue);
+    Map<String,Float> boostFields = new HashMap<>();
 
     try {
       TopDocs td = searcher.search(docIdQuery, 1);
@@ -85,6 +91,11 @@ public class SimpleMLTQParser extends QP
       if(localParams.get("maxdf") != null) {
         mlt.setMaxDocFreq(localParams.getInt("maxdf"));
       }
+
+      if(localParams.get("boost") != null) {
+        mlt.setBoost(localParams.getBool("boost"));
+        boostFields = SolrPluginUtils.parseFieldBoosts(qf);
+      }
       
       ArrayList<String> fields = new ArrayList();
 
@@ -115,8 +126,35 @@ public class SimpleMLTQParser extends QP
       mlt.setFieldNames(fields.toArray(new String[fields.size()]));
       mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
-      return mlt.like(scoreDocs[0].doc);
+      Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
+      BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
+
+      if (boostFields.size() > 0) {
+        BooleanQuery.Builder newQ = new BooleanQuery.Builder();
+        newQ.setDisableCoord(boostedMLTQuery.isCoordDisabled());
+        newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
+
+        for (BooleanClause clause : boostedMLTQuery) {
+          Query q = clause.getQuery();
+          Float b = boostFields.get(((TermQuery) q).getTerm().field());
+
+          if (b != null) {
+            q = new BoostQuery(q, b);
+          }
+
+          newQ.add(q, clause.getOccur());
+        }
+
+        boostedMLTQuery = newQ.build();
+      }
+
+      // exclude current document from results
+      BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
+      realMLTQuery.setDisableCoord(true);
+      realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
+      realMLTQuery.add(docIdQuery, BooleanClause.Occur.MUST_NOT);
 
+      return realMLTQuery.build();
     } catch (IOException e) {
       throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,
           "Error completing MLT request" + e.getMessage());

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java?rev=1715931&r1=1715930&r2=1715931&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java Mon Nov 23 19:41:08 2015
@@ -109,7 +109,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
     QueryResponse queryResponse = cloudClient.query(params);
     SolrDocumentList solrDocuments = queryResponse.getResults();
-    int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32};
+    int[] expectedIds = new int[]{7, 13, 14, 15, 16, 20, 22, 24, 32, 9};
     int[] actualIds = new int[10];
     int i = 0;
     for (SolrDocument solrDocument : solrDocuments) {
@@ -122,7 +122,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.DEBUG, "true");
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
-    expectedIds = new int[]{3, 29, 27, 26, 28};
+    expectedIds = new int[]{29, 27, 26, 28};
     actualIds = new int[solrDocuments.size()];
     i = 0;
     for (SolrDocument solrDocument : solrDocuments) {
@@ -130,30 +130,27 @@ public class CloudMLTQParserTest extends
     }
     assertArrayEquals(expectedIds, actualIds);
 
-    String expectedQueryString = "lowerfilt:bmw lowerfilt:usa";
+    String[] expectedQueryStrings = new String[]{
+      "(+(lowerfilt:bmw lowerfilt:usa) -id:3)/no_coord",
+      "(+(lowerfilt:usa lowerfilt:bmw) -id:3)/no_coord"};
 
-    ArrayList<String> actualParsedQueries;
+    String[] actualParsedQueries;
     
     if(queryResponse.getDebugMap().get("parsedquery") instanceof  String) {
-      actualParsedQueries = new ArrayList();
-      actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
+      actualParsedQueries = new String[]{(String) queryResponse.getDebugMap().get("parsedquery")};
     } else {
-      actualParsedQueries = (ArrayList<String>) queryResponse
-          .getDebugMap().get("parsedquery");
-    }
-      
-    for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
-      assertTrue("Parsed queries aren't equal",
-          compareParsedQueryStrings(expectedQueryString,
-              actualParsedQueries.get(counter)));
+      actualParsedQueries = ((ArrayList<String>) queryResponse
+          .getDebugMap().get("parsedquery")).toArray(new String[0]);
+      Arrays.sort(actualParsedQueries);
     }
+    assertArrayEquals(expectedQueryStrings, actualParsedQueries);
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26");
     params.set(CommonParams.DEBUG, "true");
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
-    expectedIds = new int[]{26, 27, 3, 29, 28};
+    expectedIds = new int[]{27, 3, 29, 28};
     actualIds = new int[solrDocuments.size()];
     i = 0;
     for (SolrDocument solrDocument : solrDocuments) {
@@ -162,21 +159,18 @@ public class CloudMLTQParserTest extends
     
     assertArrayEquals(expectedIds, actualIds);
 
-    expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i";
+    expectedQueryStrings = new String[]{
+      "(+(lowerfilt:bmw lowerfilt:usa) -id:26)/no_coord",
+      "(+(lowerfilt:usa lowerfilt:bmw lowerfilt:328i) -id:26)/no_coord"};
 
     if(queryResponse.getDebugMap().get("parsedquery") instanceof  String) {
-      actualParsedQueries = new ArrayList();
-      actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
+      actualParsedQueries = new String[]{(String) queryResponse.getDebugMap().get("parsedquery")};
     } else {
-      actualParsedQueries = (ArrayList<String>) queryResponse
-          .getDebugMap().get("parsedquery");
-    }
-      
-    for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
-      assertTrue("Parsed queries aren't equal",
-          compareParsedQueryStrings(expectedQueryString,
-              actualParsedQueries.get(counter)));
+      actualParsedQueries = ((ArrayList<String>) queryResponse
+          .getDebugMap().get("parsedquery")).toArray(new String[0]);
+      Arrays.sort(actualParsedQueries);
     }
+    assertArrayEquals(expectedQueryStrings, actualParsedQueries);
 
     params = new ModifiableSolrParams();
     // Test out a high value of df and make sure nothing matches.
@@ -200,7 +194,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.DEBUG, "true");
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
-    assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size());
+    assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 4, solrDocuments.size());
 
     // Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
     // analyzer are used for MLT Query construction.
@@ -210,7 +204,7 @@ public class CloudMLTQParserTest extends
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
     actualIds = new int[solrDocuments.size()];
-    expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19};
+    expectedIds = new int[]{13, 14, 15, 16, 22, 24, 32, 18, 19, 21};
     i = 0;
     StringBuilder sb = new StringBuilder();
     for (SolrDocument solrDocument : solrDocuments) {

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java?rev=1715931&r1=1715930&r2=1715931&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java Mon Nov 23 19:41:08 2015
@@ -101,10 +101,9 @@ public class SimpleMLTQParserTest extend
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
     params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
-        "//result/doc[1]/int[@name='id'][.='26']",
-        "//result/doc[2]/int[@name='id'][.='29']",
-        "//result/doc[3]/int[@name='id'][.='27']",
-        "//result/doc[4]/int[@name='id'][.='28']"
+        "//result/doc[1]/int[@name='id'][.='29']",
+        "//result/doc[2]/int[@name='id'][.='27']",
+        "//result/doc[3]/int[@name='id'][.='28']"
     );
 
     params = new ModifiableSolrParams();
@@ -118,14 +117,14 @@ public class SimpleMLTQParserTest extend
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
     params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
-        "//result[@numFound='4']"
+        "//result[@numFound='3']"
     );
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26");
     params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
-        "//result[@numFound='1']"
+        "//result[@numFound='0']"
     );
   }