You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2015/07/07 02:05:23 UTC
svn commit: r1689531 - in /lucene/dev/trunk:
lucene/queries/src/java/org/apache/lucene/queries/mlt/ solr/
solr/core/src/java/org/apache/solr/search/mlt/
solr/core/src/test/org/apache/solr/search/mlt/
Author: anshum
Date: Tue Jul 7 00:05:23 2015
New Revision: 1689531
URL: http://svn.apache.org/r1689531
Log:
SOLR-7143: MoreLikeThis Query parser now handles multiple field names
Modified:
lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Jul 7 00:05:23 2015
@@ -753,9 +753,10 @@ public final class MoreLikeThis {
IOException {
HashMap<String,Int> termFreqMap = new HashMap();
for (String fieldName : fieldNames) {
-
for (String field : fields.keySet()) {
Collection<Object> fieldValues = fields.get(field);
+ if(fieldValues == null)
+ continue;
for(Object fieldValue:fieldValues) {
if (fieldValue != null) {
addTermFrequencies(new StringReader(String.valueOf(fieldValue)), termFreqMap,
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Tue Jul 7 00:05:23 2015
@@ -209,6 +209,9 @@ Bug Fixes
* SOLR-7741: Add missing fields to SolrIndexerConfig.toMap
(Mike Drob, Christine Poerschke via Ramkumar Aiyengar)
+* SOLR-7143: MoreLikeThis Query parser should handle multiple field names
+ (Jens Wille, Anshum Gupta)
+
Optimizations
----------------------
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java Tue Jul 7 00:05:23 2015
@@ -20,6 +20,7 @@ import org.apache.lucene.queries.mlt.Mor
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.StringUtils;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@@ -38,8 +39,11 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
+import java.util.regex.Pattern;
public class CloudMLTQParser extends QParser {
+ // Pattern is thread safe -- TODO? share this with general 'fl' param
+ private static final Pattern splitList = Pattern.compile(",| ");
public CloudMLTQParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
@@ -86,14 +90,21 @@ public class CloudMLTQParser extends QPa
String[] qf = localParams.getParams("qf");
Map<String, Collection<Object>> filteredDocument = new HashMap();
+ ArrayList<String> fieldNames = new ArrayList();
+
if (qf != null) {
- mlt.setFieldNames(qf);
- for (String field : qf) {
- filteredDocument.put(field, doc.getFieldValues(field));
+ for (String fieldName : qf) {
+ if (!StringUtils.isEmpty(fieldName)) {
+ String[] strings = splitList.split(fieldName);
+ for (String string : strings) {
+ if (!StringUtils.isEmpty(string)) {
+ fieldNames.add(string);
+ }
+ }
+ }
}
} else {
Map<String, SchemaField> fields = req.getSchema().getFields();
- ArrayList<String> fieldNames = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
@@ -101,10 +112,18 @@ public class CloudMLTQParser extends QPa
if(fields.get(field).stored()
&& fields.get(field).getType().isExplicitAnalyzer()) {
fieldNames.add(field);
- filteredDocument.put(field, doc.getFieldValues(field));
}
}
- mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+ }
+
+ if( fieldNames.size() < 1 ) {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
+ "MoreLikeThis requires at least one similarity field: qf" );
+ }
+
+ mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+ for (String field : fieldNames) {
+ filteredDocument.put(field, doc.getFieldValues(field));
}
try {
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java Tue Jul 7 00:05:23 2015
@@ -25,6 +25,7 @@ import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.common.SolrException;
+import org.apache.solr.common.StringUtils;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
@@ -35,8 +36,11 @@ import org.apache.solr.search.SolrIndexS
import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
+import java.util.regex.Pattern;
public class SimpleMLTQParser extends QParser {
+ // Pattern is thread safe -- TODO? share this with general 'fl' param
+ private static final Pattern splitList = Pattern.compile(",| ");
public SimpleMLTQParser(String qstr, SolrParams localParams,
SolrParams params, SolrQueryRequest req) {
@@ -85,18 +89,30 @@ public class SimpleMLTQParser extends QP
ArrayList<String> fields = new ArrayList();
if (qf != null) {
- mlt.setFieldNames(qf);
+ for (String fieldName : qf) {
+ if (!StringUtils.isEmpty(fieldName)) {
+ String[] strings = splitList.split(fieldName);
+ for (String string : strings) {
+ if (!StringUtils.isEmpty(string)) {
+ fields.add(string);
+ }
+ }
+ }
+ }
} else {
-
Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
for (String fieldName : fieldNames.keySet()) {
if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
if (fieldNames.get(fieldName).getType().getNumericType() == null)
fields.add(fieldName);
}
- mlt.setFieldNames(fields.toArray(new String[fields.size()]));
+ }
+ if( fields.size() < 1 ) {
+ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
+ "MoreLikeThis requires at least one similarity field: qf" );
}
+ mlt.setFieldNames(fields.toArray(new String[fields.size()]));
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
return mlt.like(scoreDocs[0].doc);
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java Tue Jul 7 00:05:23 2015
@@ -59,6 +59,7 @@ public class CloudMLTQParserTest extends
String id = "id";
delQ("*:*");
String FIELD1 = "lowerfilt" ;
+ String FIELD2 = "lowerfilt1" ;
indexDoc(sdoc(id, "1", FIELD1, "toyota"));
indexDoc(sdoc(id, "2", FIELD1, "chevrolet"));
@@ -87,6 +88,14 @@ public class CloudMLTQParserTest extends
indexDoc(sdoc(id, "26", FIELD1, "bmw usa 328i"));
indexDoc(sdoc(id, "27", FIELD1, "bmw usa 535i"));
indexDoc(sdoc(id, "28", FIELD1, "bmw 750Li"));
+ indexDoc(sdoc(id, "29", FIELD1, "bmw usa",
+ FIELD2, "red green blue"));
+ indexDoc(sdoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
+ FIELD2, "red green yellow"));
+ indexDoc(sdoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
+ FIELD2, "green blue yellow"));
+ indexDoc(sdoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
+ FIELD2, "yellow white black"));
commit();
@@ -100,7 +109,7 @@ public class CloudMLTQParserTest extends
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
QueryResponse queryResponse = cloudClient.query(params);
SolrDocumentList solrDocuments = queryResponse.getResults();
- int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 9};
+ int[] expectedIds = new int[]{17, 7, 13, 14, 15, 16, 20, 22, 24, 32};
int[] actualIds = new int[10];
int i = 0;
for (SolrDocument solrDocument : solrDocuments) {
@@ -113,7 +122,7 @@ public class CloudMLTQParserTest extends
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
- expectedIds = new int[]{3, 27, 26, 28};
+ expectedIds = new int[]{3, 29, 27, 26, 28};
actualIds = new int[solrDocuments.size()];
i = 0;
for (SolrDocument solrDocument : solrDocuments) {
@@ -140,6 +149,36 @@ public class CloudMLTQParserTest extends
}
params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1 mindf=0 mintf=1}26");
+ params.set(CommonParams.DEBUG, "true");
+ queryResponse = queryServer(params);
+ solrDocuments = queryResponse.getResults();
+ expectedIds = new int[]{26, 27, 3, 29, 28};
+ actualIds = new int[solrDocuments.size()];
+ i = 0;
+ for (SolrDocument solrDocument : solrDocuments) {
+ actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+ }
+
+ assertArrayEquals(expectedIds, actualIds);
+
+ expectedQueryString = "lowerfilt:bmw lowerfilt:usa lowerfilt:328i";
+
+ if(queryResponse.getDebugMap().get("parsedquery") instanceof String) {
+ actualParsedQueries = new ArrayList();
+ actualParsedQueries.add((String) queryResponse.getDebugMap().get("parsedquery"));
+ } else {
+ actualParsedQueries = (ArrayList<String>) queryResponse
+ .getDebugMap().get("parsedquery");
+ }
+
+ for (int counter = 0; counter < actualParsedQueries.size(); counter++) {
+ assertTrue("Parsed queries aren't equal",
+ compareParsedQueryStrings(expectedQueryString,
+ actualParsedQueries.get(counter)));
+ }
+
+ params = new ModifiableSolrParams();
// Test out a high value of df and make sure nothing matches.
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=20 mintf=1}3");
params.set(CommonParams.DEBUG, "true");
@@ -161,7 +200,7 @@ public class CloudMLTQParserTest extends
params.set(CommonParams.DEBUG, "true");
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
- assertEquals("Expected to match 4 documents with a minwl of 3 but found more", solrDocuments.size(), 4);
+ assertEquals("Expected to match 4 documents with a minwl of 3 but found more", 5, solrDocuments.size());
// Assert that {!mlt}id does not throw an exception i.e. implicitly, only fields that are stored + have explicit
// analyzer are used for MLT Query construction.
@@ -171,10 +210,12 @@ public class CloudMLTQParserTest extends
queryResponse = queryServer(params);
solrDocuments = queryResponse.getResults();
actualIds = new int[solrDocuments.size()];
- expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 18, 19, 21};
+ expectedIds = new int[]{13, 14, 15, 16, 20, 22, 24, 32, 18, 19};
i = 0;
+ StringBuilder sb = new StringBuilder();
for (SolrDocument solrDocument : solrDocuments) {
actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+ sb.append(actualIds[i-1]).append(", ");
}
assertArrayEquals(expectedIds, actualIds);
}
Modified: lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java?rev=1689531&r1=1689530&r2=1689531&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java (original)
+++ lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java Tue Jul 7 00:05:23 2015
@@ -17,9 +17,13 @@ package org.apache.solr.search.mlt;
* limitations under the License.
*/
+import java.util.ArrayList;
+
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.response.SolrQueryResponse;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -34,38 +38,50 @@ public class SimpleMLTQParserTest extend
@Test
public void doTest() throws Exception {
String id = "id";
+ String FIELD1 = "lowerfilt" ;
+ String FIELD2 = "lowerfilt1" ;
delQ("*:*");
- assertU(adoc(id, "1", "lowerfilt", "toyota"));
- assertU(adoc(id, "2", "lowerfilt", "chevrolet"));
- assertU(adoc(id, "3", "lowerfilt", "suzuki"));
- assertU(adoc(id, "4", "lowerfilt", "ford"));
- assertU(adoc(id, "5", "lowerfilt", "ferrari"));
- assertU(adoc(id, "6", "lowerfilt", "jaguar"));
- assertU(adoc(id, "7", "lowerfilt", "mclaren moon or the moon and moon moon shine " +
+ assertU(adoc(id, "1", FIELD1, "toyota"));
+ assertU(adoc(id, "2", FIELD1, "chevrolet"));
+ assertU(adoc(id, "3", FIELD1, "suzuki"));
+ assertU(adoc(id, "4", FIELD1, "ford"));
+ assertU(adoc(id, "5", FIELD1, "ferrari"));
+ assertU(adoc(id, "6", FIELD1, "jaguar"));
+ assertU(adoc(id, "7", FIELD1, "mclaren moon or the moon and moon moon shine " +
"and the moon but moon was good foxes too"));
- assertU(adoc(id, "8", "lowerfilt", "sonata"));
- assertU(adoc(id, "9", "lowerfilt", "The quick red fox jumped over the lazy big " +
+ assertU(adoc(id, "8", FIELD1, "sonata"));
+ assertU(adoc(id, "9", FIELD1, "The quick red fox jumped over the lazy big " +
"and large brown dogs."));
- assertU(adoc(id, "10", "lowerfilt", "blue"));
- assertU(adoc(id, "12", "lowerfilt", "glue"));
- assertU(adoc(id, "13", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "14", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "15", "lowerfilt", "The fat red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "16", "lowerfilt", "The slim red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "17", "lowerfilt", "The quote red fox jumped moon over the lazy " +
+ assertU(adoc(id, "10", FIELD1, "blue"));
+ assertU(adoc(id, "12", FIELD1, "glue"));
+ assertU(adoc(id, "13", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "14", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "15", FIELD1, "The fat red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "16", FIELD1, "The slim red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "17", FIELD1, "The quote red fox jumped moon over the lazy " +
"brown dogs moon. Of course moon. Foxes and moon come back to the foxes and moon"));
- assertU(adoc(id, "18", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "19", "lowerfilt", "The hose red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "20", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "21", "lowerfilt", "The court red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "22", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "23", "lowerfilt", "The quote red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "24", "lowerfilt", "The file red fox jumped over the lazy brown dogs."));
- assertU(adoc(id, "25", "lowerfilt", "rod fix"));
+ assertU(adoc(id, "18", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "19", FIELD1, "The hose red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "20", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "21", FIELD1, "The court red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "22", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "23", FIELD1, "The quote red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "24", FIELD1, "The file red fox jumped over the lazy brown dogs."));
+ assertU(adoc(id, "25", FIELD1, "rod fix"));
+ assertU(adoc(id, "26", FIELD1, "bmw usa 328i"));
+ assertU(adoc(id, "27", FIELD1, "bmw usa 535i"));
+ assertU(adoc(id, "28", FIELD1, "bmw 750Li"));
+ assertU(adoc(id, "29", FIELD1, "bmw usa",
+ FIELD2, "red green blue"));
+ assertU(adoc(id, "30", FIELD1, "The quote red fox jumped over the lazy brown dogs.",
+ FIELD2, "red green yellow"));
+ assertU(adoc(id, "31", FIELD1, "The fat red fox jumped over the lazy brown dogs.",
+ FIELD2, "green blue yellow"));
+ assertU(adoc(id, "32", FIELD1, "The slim red fox jumped over the lazy brown dogs.",
+ FIELD2, "yellow white black"));
assertU(commit());
-
ModifiableSolrParams params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt}17");
assertQ(req(params),
@@ -79,7 +95,38 @@ public class SimpleMLTQParserTest extend
"//result/doc[8]/int[@name='id'][.='21']",
"//result/doc[9]/int[@name='id'][.='22']",
"//result/doc[10]/int[@name='id'][.='23']"
- );
+ );
+
+ params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
+ params.set(CommonParams.DEBUG, "true");
+ assertQ(req(params),
+ "//result/doc[1]/int[@name='id'][.='26']",
+ "//result/doc[2]/int[@name='id'][.='29']",
+ "//result/doc[3]/int[@name='id'][.='27']",
+ "//result/doc[4]/int[@name='id'][.='28']"
+ );
+
+ params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
+ params.set(CommonParams.DEBUG, "true");
+ assertQ(req(params),
+ "//result[@numFound='0']"
+ );
+
+ params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
+ params.set(CommonParams.DEBUG, "true");
+ assertQ(req(params),
+ "//result[@numFound='4']"
+ );
+
+ params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=4 mintf=1 mindf=1}26");
+ params.set(CommonParams.DEBUG, "true");
+ assertQ(req(params),
+ "//result[@numFound='1']"
+ );
}
}