You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2015/07/07 02:05:23 UTC

svn commit: r1689531 - in /lucene/dev/trunk: lucene/queries/src/java/org/apache/lucene/queries/mlt/ solr/ solr/core/src/java/org/apache/solr/search/mlt/ solr/core/src/test/org/apache/solr/search/mlt/

Author: anshum
Date: Tue Jul  7 00:05:23 2015
New Revision: 1689531

URL: http://svn.apache.org/r1689531
Log:
SOLR-7143: MoreLikeThis Query parser now handles multiple field names

Modified:
    lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
    lucene/dev/trunk/solr/CHANGES.txt
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
    lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java

Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Jul  7 00:05:23 2015
@@ -753,9 +753,10 @@ public final class MoreLikeThis {
       IOException {
     HashMap<String,Int> termFreqMap = new HashMap();
     for (String fieldName : fieldNames) {
-
       for (String field : fields.keySet()) {
         Collection<Object> fieldValues = fields.get(field);
+        if(fieldValues == null)
+          continue;
         for(Object fieldValue:fieldValues) {
           if (fieldValue != null) {
             addTermFrequencies(new StringReader(String.valueOf(fieldValue)), termFreqMap,

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Jul  7 00:05:23 2015
@@ -209,6 +209,9 @@ Bug Fixes
 * SOLR-7741: Add missing fields to SolrIndexerConfig.toMap
   (Mike Drob, Christine Poerschke via Ramkumar Aiyengar)
 
+* SOLR-7143: MoreLikeThis Query parser should handle multiple field names
+  (Jens Wille, Anshum Gupta)
+
 Optimizations
 ----------------------
 

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java Tue Jul  7 00:05:23 2015
@@ -20,6 +20,7 @@ import org.apache.lucene.queries.mlt.Mor
 import org.apache.lucene.search.Query;
 import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.StringUtils;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -38,8 +39,11 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 public class CloudMLTQParser extends QParser {
+  // Pattern is thread safe -- TODO? share this with general 'fl' param
+  private static final Pattern splitList = Pattern.compile(",| ");
 
   public CloudMLTQParser(String qstr, SolrParams localParams,
                          SolrParams params, SolrQueryRequest req) {
@@ -86,14 +90,21 @@ public class CloudMLTQParser extends QPa
     String[] qf = localParams.getParams("qf");
     Map<String, Collection<Object>> filteredDocument = new HashMap();
 
+    ArrayList<String> fieldNames = new ArrayList();
+
     if (qf != null) {
-      mlt.setFieldNames(qf);
-      for (String field : qf) {
-        filteredDocument.put(field, doc.getFieldValues(field));
+      for (String fieldName : qf) {
+        if (!StringUtils.isEmpty(fieldName))  {
+          String[] strings = splitList.split(fieldName);
+          for (String string : strings) {
+            if (!StringUtils.isEmpty(string)) {
+              fieldNames.add(string);
+            }
+          }
+        }
       }
     } else {
       Map<String, SchemaField> fields = req.getSchema().getFields();
-      ArrayList<String> fieldNames = new ArrayList();
       for (String field : doc.getFieldNames()) {
         // Only use fields that are stored and have an explicit analyzer.
         // This makes sense as the query uses tf/idf/.. for query construction.
@@ -101,10 +112,18 @@ public class CloudMLTQParser extends QPa
         if(fields.get(field).stored() 
             && fields.get(field).getType().isExplicitAnalyzer()) {
           fieldNames.add(field);
-          filteredDocument.put(field, doc.getFieldValues(field));
         }
       }
-      mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+    }
+
+    if( fieldNames.size() < 1 ) {
+      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
+          "MoreLikeThis requires at least one similarity field: qf" );
+    }
+
+    mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+    for (String field : fieldNames) {
+      filteredDocument.put(field, doc.getFieldValues(field));
     }
 
     try {

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java Tue Jul  7 00:05:23 2015
@@ -25,6 +25,7 @@ import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.solr.common.SolrException;
+import org.apache.solr.common.StringUtils;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.schema.SchemaField;
@@ -35,8 +36,11 @@ import org.apache.solr.search.SolrIndexS
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Map;
+import java.util.regex.Pattern;
 
 public class SimpleMLTQParser extends QParser {
+  // Pattern is thread safe -- TODO? share this with general 'fl' param
+  private static final Pattern splitList = Pattern.compile(",| ");
 
   public SimpleMLTQParser(String qstr, SolrParams localParams,
                           SolrParams params, SolrQueryRequest req) {
@@ -85,18 +89,30 @@ public class SimpleMLTQParser extends QP
       ArrayList<String> fields = new ArrayList();
 
       if (qf != null) {
-        mlt.setFieldNames(qf);
+        for (String fieldName : qf) {
+          if (!StringUtils.isEmpty(fieldName))  {
+            String[] strings = splitList.split(fieldName);
+            for (String string : strings) {
+              if (!StringUtils.isEmpty(string)) {
+                fields.add(string);
+              }
+            }
+          }
+        }
       } else {
-
         Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
         for (String fieldName : fieldNames.keySet()) {
           if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
             if (fieldNames.get(fieldName).getType().getNumericType() == null)
               fields.add(fieldName);
         }
-        mlt.setFieldNames(fields.toArray(new String[fields.size()]));
+      }
+      if( fields.size() < 1 ) {
+        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
+            "MoreLikeThis requires at least one similarity field: qf" );
       }
 
+      mlt.setFieldNames(fields.toArray(new String[fields.size()]));
       mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
 
       return mlt.like(scoreDocs[0].doc);

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java Tue Jul  7 00:05:23 2015
@@ -59,6 +59,7 @@ public class CloudMLTQParserTest extends
     String id = "id";
     delQ("*:*");
     String FIELD1 = "lowerfilt" ;
+    String FIELD2 = "lowerfilt1" ;
     
     indexDoc(sdoc(id, "1", FIELD1, "toyota"));
     indexDoc(sdoc(id, "2", FIELD1, "chevrolet"));
@@ -87,6 +88,14 @@ public class CloudMLTQParserTest extends
     indexDoc(sdoc(id, "26", FIELD1, "bmw usa 328i"));
     indexDoc(sdoc(id, "27", FIELD1, "bmw usa 535i"));
     indexDoc(sdoc(id, "28", FIELD1, "bmw 750Li"));
+    indexDoc(sdoc(id, "29", FIELD1, "bmw usa",
+        FIELD2, "red green blue"));
+    indexDoc(sdoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
+        FIELD2, "red green yellow"));
+    indexDoc(sdoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
+        FIELD2, "green blue yellow"));
+    indexDoc(sdoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
+        FIELD2, "yellow white black"));
 
     commit();
 
@@ -100,7 +109,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
     QueryResponse queryResponse = cloudClient.query(params);
     SolrDocumentList solrDocuments = queryResponse.getResults();
-    int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9};
+    int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32};
     int[] actualIds = new int[10];
     int i = 0;
     for (SolrDocument solrDocument : solrDocuments) {
@@ -113,7 +122,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.DEBUG, "true");
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
-    expectedIds = new int[]{3, 27, 26, 28};
+    expectedIds = new int[]{3, 29, 27, 26, 28};
     actualIds = new int[solrDocuments.size()];
     i = 0;
     for (SolrDocument solrDocument : solrDocuments) {
@@ -140,6 +149,36 @@ public class CloudMLTQParserTest extends
     }
 
     params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26");
+    params.set(CommonParams.DEBUG, "true");
+    queryResponse = queryServer(params);
+    solrDocuments = queryResponse.getResults();
+    expectedIds = new int[]{26, 27, 3, 29, 28};
+    actualIds = new int[solrDocuments.size()];
+    i = 0;
+    for (SolrDocument solrDocument : solrDocuments) {
+      actualIds[i++] =  Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+    }
+    
+    assertArrayEquals(expectedIds, actualIds);
+
+    expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i";
+
+    if(queryResponse.getDebugMap().get("parsedquery") instanceof  String) {
+      actualParsedQueries = new ArrayList();
+      actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
+    } else {
+      actualParsedQueries = (ArrayList<String>) queryResponse
+          .getDebugMap().get("parsedquery");
+    }
+      
+    for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
+      assertTrue("Parsed queries aren't equal",
+          compareParsedQueryStrings(expectedQueryString,
+              actualParsedQueries.get(counter)));
+    }
+
+    params = new ModifiableSolrParams();
     // Test out a high value of df and make sure nothing matches.
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3");
     params.set(CommonParams.DEBUG, "true");
@@ -161,7 +200,7 @@ public class CloudMLTQParserTest extends
     params.set(CommonParams.DEBUG, "true");
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
-    assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4);
+    assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size());
 
     // Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
     // analyzer are used for MLT Query construction.
@@ -171,10 +210,12 @@ public class CloudMLTQParserTest extends
     queryResponse = queryServer(params);
     solrDocuments = queryResponse.getResults();
     actualIds = new int[solrDocuments.size()];
-    expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21};
+    expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19};
     i = 0;
+    StringBuilder sb = new StringBuilder();
     for (SolrDocument solrDocument : solrDocuments) {
       actualIds[i++] =  Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+      sb.append(actualIds[i-1]).append(", ");
     }
     assertArrayEquals(expectedIds, actualIds);
   }

Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java Tue Jul  7 00:05:23 2015
@@ -17,9 +17,13 @@ package org.apache.solr.search.mlt;
  * limitations under the License.
  */
 
+import java.util.ArrayList;
+
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrDocument;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.response.SolrQueryResponse;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
@@ -34,38 +38,50 @@ public class SimpleMLTQParserTest extend
   @Test
   public void doTest() throws Exception {
     String id = "id";
+    String FIELD1 = "lowerfilt" ;
+    String FIELD2 = "lowerfilt1" ;
     delQ("*:*");
-    assertU(adoc(id, "1", "lowerfilt", "toyota"));
-    assertU(adoc(id, "2", "lowerfilt", "chevrolet"));
-    assertU(adoc(id, "3", "lowerfilt", "suzuki"));
-    assertU(adoc(id, "4", "lowerfilt", "ford"));
-    assertU(adoc(id, "5", "lowerfilt", "ferrari"));
-    assertU(adoc(id, "6", "lowerfilt", "jaguar"));
-    assertU(adoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine " +
+    assertU(adoc(id, "1", FIELD1, "toyota"));
+    assertU(adoc(id, "2", FIELD1, "chevrolet"));
+    assertU(adoc(id, "3", FIELD1, "suzuki"));
+    assertU(adoc(id, "4", FIELD1, "ford"));
+    assertU(adoc(id, "5", FIELD1, "ferrari"));
+    assertU(adoc(id, "6", FIELD1, "jaguar"));
+    assertU(adoc(id, "7", FIELD1, "mclaren moon or the moon and moon moon shine " +
         "and the moon but moon was good foxes too"));
-    assertU(adoc(id, "8", "lowerfilt", "sonata"));
-    assertU(adoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big " +
+    assertU(adoc(id, "8", FIELD1, "sonata"));
+    assertU(adoc(id, "9", FIELD1, "The quick red fox jumped over the lazy big " +
         "and large brown dogs."));
-    assertU(adoc(id, "10", "lowerfilt", "blue"));
-    assertU(adoc(id, "12", "lowerfilt", "glue"));
-    assertU(adoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy " +
+    assertU(adoc(id, "10", FIELD1, "blue"));
+    assertU(adoc(id, "12", FIELD1, "glue"));
+    assertU(adoc(id, "13", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "14", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "15", FIELD1, "The fat red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "16", FIELD1, "The slim red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "17", FIELD1, "The quote red fox jumped moon over the lazy " +
         "brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon"));
-    assertU(adoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs."));
-    assertU(adoc(id, "25", "lowerfilt", "rod fix"));
+    assertU(adoc(id, "18", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "19", FIELD1, "The hose red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "20", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "21", FIELD1, "The court red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "22", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "23", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "24", FIELD1, "The file red fox jumped over the lazy brown dogs."));
+    assertU(adoc(id, "25", FIELD1, "rod fix"));
+    assertU(adoc(id, "26", FIELD1, "bmw usa 328i"));
+    assertU(adoc(id, "27", FIELD1, "bmw usa 535i"));
+    assertU(adoc(id, "28", FIELD1, "bmw 750Li"));
+    assertU(adoc(id, "29", FIELD1, "bmw usa",
+        FIELD2, "red green blue"));
+    assertU(adoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
+        FIELD2, "red green yellow"));
+    assertU(adoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
+        FIELD2, "green blue yellow"));
+    assertU(adoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
+        FIELD2, "yellow white black"));
 
     assertU(commit());
 
-
     ModifiableSolrParams params = new ModifiableSolrParams();
     params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
     assertQ(req(params),
@@ -79,7 +95,38 @@ public class SimpleMLTQParserTest extend
         "//result/doc[8]/int[@name='id'][.='21']",
         "//result/doc[9]/int[@name='id'][.='22']",
         "//result/doc[10]/int[@name='id'][.='23']"
-        );
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
+    params.set(CommonParams.DEBUG, "true");
+    assertQ(req(params),
+        "//result/doc[1]/int[@name='id'][.='26']",
+        "//result/doc[2]/int[@name='id'][.='29']",
+        "//result/doc[3]/int[@name='id'][.='27']",
+        "//result/doc[4]/int[@name='id'][.='28']"
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
+    params.set(CommonParams.DEBUG, "true");
+    assertQ(req(params),
+        "//result[@numFound='0']"
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
+    params.set(CommonParams.DEBUG, "true");
+    assertQ(req(params),
+        "//result[@numFound='4']"
+    );
+
+    params = new ModifiableSolrParams();
+    params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26");
+    params.set(CommonParams.DEBUG, "true");
+    assertQ(req(params),
+        "//result[@numFound='1']"
+    );
   }
 
 }