You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2017/01/09 21:26:23 UTC

lucene-solr:master: SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract actual values from IndexableField type fields to the filtered document.

Repository: lucene-solr
Updated Branches:
  refs/heads/master b8383db06 -> 2b4e3dd94


SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract actual values from IndexableField type fields to the filtered document.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2b4e3dd9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2b4e3dd9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2b4e3dd9

Branch: refs/heads/master
Commit: 2b4e3dd941a7a88274f2a86f18ea57a9d95e4364
Parents: b8383db
Author: anshum <an...@apache.org>
Authored: Mon Jan 9 13:05:21 2017 -0800
Committer: anshum <an...@apache.org>
Committed: Mon Jan 9 13:06:24 2017 -0800

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  4 ++
 .../apache/solr/search/mlt/CloudMLTQParser.java | 49 ++++++++++++--------
 .../solr/search/mlt/SimpleMLTQParser.java       | 30 ++++++------
 .../solr/search/mlt/CloudMLTQParserTest.java    | 23 ++++++++-
 .../solr/search/mlt/SimpleMLTQParserTest.java   | 33 +++++++++++--
 5 files changed, 102 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c79b3c6..2b79f04 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -345,6 +345,10 @@ Bug Fixes
 * SOLR-9883: Example schemaless solr config files can lead to invalid tlog replays: when updates are buffered,
   update processors ordered before DistributedUpdateProcessor, e.g. field normalization, are never run. (Steve Rowe)
 
+* SOLR-9644: SimpleMLTQParser and CloudMLTQParser did not handle field boosts properly
+  and CloudMLTQParser included extra strings from the field definitions in the query.
+  (Ere Maijala via Anshum Gupta)
+
 Other Changes
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 0f46725..945047b 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.legacy.LegacyNumericUtils;
 import org.apache.lucene.queries.mlt.MoreLikeThis;
@@ -64,73 +65,83 @@ public class CloudMLTQParser extends QParser {
           SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " +
           "document with id [" + id + "]");
     }
-    
+
     String[] qf = localParams.getParams("qf");
     Map<String,Float> boostFields = new HashMap<>();
     MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
-    
-    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
 
+    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
     mlt.setMinDocFreq(localParams.getInt("mindf", 0));
-
     mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
-
     mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
-
     mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
-
     mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
-    
     mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
 
-    if(localParams.get("boost") != null) {
-      mlt.setBoost(localParams.getBool("boost"));
-      boostFields = SolrPluginUtils.parseFieldBoosts(qf);
-    }
+    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
+    mlt.setBoost(boost);
 
     mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
     Map<String, Collection<Object>> filteredDocument = new HashMap<>();
-    ArrayList<String> fieldNames = new ArrayList<>();
+    String[] fieldNames;
 
     if (qf != null) {
+      ArrayList<String> fields = new ArrayList();
       for (String fieldName : qf) {
         if (!StringUtils.isEmpty(fieldName))  {
           String[] strings = splitList.split(fieldName);
           for (String string : strings) {
             if (!StringUtils.isEmpty(string)) {
-              fieldNames.add(string);
+              fields.add(string);
             }
           }
         }
       }
+      // Parse field names and boosts from the fields
+      boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+      fieldNames = boostFields.keySet().toArray(new String[0]);
     } else {
+      ArrayList<String> fields = new ArrayList();
       for (String field : doc.getFieldNames()) {
         // Only use fields that are stored and have an explicit analyzer.
         // This makes sense as the query uses tf/idf/.. for query construction.
         // We might want to relook and change this in the future though.
         SchemaField f = req.getSchema().getFieldOrNull(field);
         if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
-          fieldNames.add(field);
+          fields.add(field);
         }
       }
+      fieldNames = fields.toArray(new String[0]);
     }
 
-    if( fieldNames.size() < 1 ) {
+    if (fieldNames.length < 1) {
       throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
           "MoreLikeThis requires at least one similarity field: qf" );
     }
 
-    mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+    mlt.setFieldNames(fieldNames);
     for (String field : fieldNames) {
-      filteredDocument.put(field, doc.getFieldValues(field));
+      Collection<Object> fieldValues = doc.getFieldValues(field);
+      if (fieldValues != null) {
+        Collection<Object> values = new ArrayList();
+        for (Object val : fieldValues) {
+          if (val instanceof IndexableField) {
+            values.add(((IndexableField)val).stringValue());
+          }
+          else {
+            values.add(val);
+          }
+        }
+        filteredDocument.put(field, values);
+      }
     }
 
     try {
       Query rawMLTQuery = mlt.like(filteredDocument);
       BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
 
-      if (boostFields.size() > 0) {
+      if (boost && boostFields.size() > 0) {
         BooleanQuery.Builder newQ = new BooleanQuery.Builder();
         newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
index 50803df..de6eb58 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
@@ -76,16 +76,13 @@ public class SimpleMLTQParser extends QParser {
       mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
       mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
       mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
+      Boolean boost = localParams.getBool("boost", false);
+      mlt.setBoost(boost);
 
-      // what happens if value is explicitly set to false?
-      if(localParams.get("boost") != null) {
-        mlt.setBoost(localParams.getBool("boost", false));
-        boostFields = SolrPluginUtils.parseFieldBoosts(qf);
-      }
+      String[] fieldNames;
       
-      ArrayList<String> fields = new ArrayList<>();
-
       if (qf != null) {
+        ArrayList<String> fields = new ArrayList<>();
         for (String fieldName : qf) {
           if (!StringUtils.isEmpty(fieldName))  {
             String[] strings = splitList.split(fieldName);
@@ -96,26 +93,31 @@ public class SimpleMLTQParser extends QParser {
             }
           }
         }
+        // Parse field names and boosts from the fields
+        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+        fieldNames = boostFields.keySet().toArray(new String[0]);
       } else {
-        Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
-        for (String fieldName : fieldNames.keySet()) {
-          if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
-            if (fieldNames.get(fieldName).getType().getNumericType() == null)
+        Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
+        ArrayList<String> fields = new ArrayList();
+        for (String fieldName : fieldDefinitions.keySet()) {
+          if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
+            if (fieldDefinitions.get(fieldName).getType().getNumericType() == null)
               fields.add(fieldName);
         }
+        fieldNames = fields.toArray(new String[0]);
       }
-      if( fields.size() < 1 ) {
+      if (fieldNames.length < 1) {
         throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
             "MoreLikeThis requires at least one similarity field: qf" );
       }
 
-      mlt.setFieldNames(fields.toArray(new String[fields.size()]));
+      mlt.setFieldNames(fieldNames);
       mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
       Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
       BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
 
-      if (boostFields.size() > 0) {
+      if (boost && boostFields.size() > 0) {
         BooleanQuery.Builder newQ = new BooleanQuery.Builder();
         newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
index ffcde2f..e3a8d7b 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
@@ -121,6 +121,27 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
     }
     assertArrayEquals(expectedIds, actualIds);
 
+    queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=false mintf=0 mindf=0}30"));
+    solrDocuments = queryResponse.getResults();
+    expectedIds = new int[]{31, 18, 23, 13, 14, 20, 22, 32, 19, 21};
+    actualIds = new int[solrDocuments.size()];
+    i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+    System.out.println("DEBUG ACTUAL IDS 1: " + Arrays.toString(actualIds));
+    assertArrayEquals(expectedIds, actualIds);
+
+    queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=true mintf=0 mindf=0}30"));
+    solrDocuments = queryResponse.getResults();
+    expectedIds = new int[]{29, 31, 32, 18, 23, 13, 14, 20, 22, 19};
+    actualIds = new int[solrDocuments.size()];
+    i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+    System.out.println("DEBUG ACTUAL IDS 2: " + Arrays.toString(actualIds));
+    assertArrayEquals(expectedIds, actualIds);
   }
 
   @Test
@@ -220,7 +241,7 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
     }
     assertArrayEquals(expectedIds, actualIds);
   }
-  
+
   public void testInvalidSourceDocument() throws IOException {
     SolrException e = expectThrows(SolrException.class, () -> {
       cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
index 6f3570f..026c594 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
@@ -108,8 +108,37 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
     );
 
     params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=false mintf=0 mindf=0}30");
+    assertQ(req(params),
+        "//result/doc[1]/int[@name='id'][.='31']",
+        "//result/doc[2]/int[@name='id'][.='13']",
+        "//result/doc[3]/int[@name='id'][.='14']",
+        "//result/doc[4]/int[@name='id'][.='18']",
+        "//result/doc[5]/int[@name='id'][.='20']",
+        "//result/doc[6]/int[@name='id'][.='22']",
+        "//result/doc[7]/int[@name='id'][.='23']",
+        "//result/doc[8]/int[@name='id'][.='32']",
+        "//result/doc[9]/int[@name='id'][.='15']",
+        "//result/doc[10]/int[@name='id'][.='16']"
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=true mintf=0 mindf=0}30");
+    assertQ(req(params),
+        "//result/doc[1]/int[@name='id'][.='29']",
+        "//result/doc[2]/int[@name='id'][.='31']",
+        "//result/doc[3]/int[@name='id'][.='32']",
+        "//result/doc[4]/int[@name='id'][.='13']",
+        "//result/doc[5]/int[@name='id'][.='14']",
+        "//result/doc[6]/int[@name='id'][.='18']",
+        "//result/doc[7]/int[@name='id'][.='20']",
+        "//result/doc[8]/int[@name='id'][.='22']",
+        "//result/doc[9]/int[@name='id'][.='23']",
+        "//result/doc[10]/int[@name='id'][.='15']"
+    );
+
+    params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result/doc[1]/int[@name='id'][.='29']",
         "//result/doc[2]/int[@name='id'][.='27']",
@@ -118,14 +147,12 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result[@numFound='0']"
     );
 
     params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
-    params.set(CommonParams.DEBUG, "true");
     assertQ(req(params),
         "//result[@numFound='3']"
     );