You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2017/01/09 21:26:23 UTC
lucene-solr:master: SOLR-9644: Fixed SimpleMLTQParser and
CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract
actual values from IndexableField type fields to the filtered document.
Repository: lucene-solr
Updated Branches:
refs/heads/master b8383db06 -> 2b4e3dd94
SOLR-9644: Fixed SimpleMLTQParser and CloudMLTQParser to handle boosts properly and CloudMLTQParser to only extract actual values from IndexableField type fields to the filtered document.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/2b4e3dd9
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/2b4e3dd9
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/2b4e3dd9
Branch: refs/heads/master
Commit: 2b4e3dd941a7a88274f2a86f18ea57a9d95e4364
Parents: b8383db
Author: anshum <an...@apache.org>
Authored: Mon Jan 9 13:05:21 2017 -0800
Committer: anshum <an...@apache.org>
Committed: Mon Jan 9 13:06:24 2017 -0800
----------------------------------------------------------------------
solr/CHANGES.txt | 4 ++
.../apache/solr/search/mlt/CloudMLTQParser.java | 49 ++++++++++++--------
.../solr/search/mlt/SimpleMLTQParser.java | 30 ++++++------
.../solr/search/mlt/CloudMLTQParserTest.java | 23 ++++++++-
.../solr/search/mlt/SimpleMLTQParserTest.java | 33 +++++++++++--
5 files changed, 102 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c79b3c6..2b79f04 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -345,6 +345,10 @@ Bug Fixes
* SOLR-9883: Example schemaless solr config files can lead to invalid tlog replays: when updates are buffered,
update processors ordered before DistributedUpdateProcessor, e.g. field normalization, are never run. (Steve Rowe)
+* SOLR-9644: SimpleMLTQParser and CloudMLTQParser did not handle field boosts properly
+ and CloudMLTQParser included extra strings from the field definitions in the query.
+ (Ere Maijala via Anshum Gupta)
+
Other Changes
----------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
index 0f46725..945047b 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/CloudMLTQParser.java
@@ -22,6 +22,7 @@ import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.legacy.LegacyNumericUtils;
import org.apache.lucene.queries.mlt.MoreLikeThis;
@@ -64,73 +65,83 @@ public class CloudMLTQParser extends QParser {
SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " +
"document with id [" + id + "]");
}
-
+
String[] qf = localParams.getParams("qf");
Map<String,Float> boostFields = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
-
- mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
+ mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
-
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
-
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
-
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
-
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
-
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
- if(localParams.get("boost") != null) {
- mlt.setBoost(localParams.getBool("boost"));
- boostFields = SolrPluginUtils.parseFieldBoosts(qf);
- }
+ Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
+ mlt.setBoost(boost);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
- ArrayList<String> fieldNames = new ArrayList<>();
+ String[] fieldNames;
if (qf != null) {
+ ArrayList<String> fields = new ArrayList();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
- fieldNames.add(string);
+ fields.add(string);
}
}
}
}
+ // Parse field names and boosts from the fields
+ boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+ fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
+ ArrayList<String> fields = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
// We might want to relook and change this in the future though.
SchemaField f = req.getSchema().getFieldOrNull(field);
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
- fieldNames.add(field);
+ fields.add(field);
}
}
+ fieldNames = fields.toArray(new String[0]);
}
- if( fieldNames.size() < 1 ) {
+ if (fieldNames.length < 1) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"MoreLikeThis requires at least one similarity field: qf" );
}
- mlt.setFieldNames(fieldNames.toArray(new String[fieldNames.size()]));
+ mlt.setFieldNames(fieldNames);
for (String field : fieldNames) {
- filteredDocument.put(field, doc.getFieldValues(field));
+ Collection<Object> fieldValues = doc.getFieldValues(field);
+ if (fieldValues != null) {
+ Collection<Object> values = new ArrayList();
+ for (Object val : fieldValues) {
+ if (val instanceof IndexableField) {
+ values.add(((IndexableField)val).stringValue());
+ }
+ else {
+ values.add(val);
+ }
+ }
+ filteredDocument.put(field, values);
+ }
}
try {
Query rawMLTQuery = mlt.like(filteredDocument);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
- if (boostFields.size() > 0) {
+ if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
index 50803df..de6eb58 100644
--- a/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
+++ b/solr/core/src/java/org/apache/solr/search/mlt/SimpleMLTQParser.java
@@ -76,16 +76,13 @@ public class SimpleMLTQParser extends QParser {
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
+ Boolean boost = localParams.getBool("boost", false);
+ mlt.setBoost(boost);
- // what happens if value is explicitly set to false?
- if(localParams.get("boost") != null) {
- mlt.setBoost(localParams.getBool("boost", false));
- boostFields = SolrPluginUtils.parseFieldBoosts(qf);
- }
+ String[] fieldNames;
- ArrayList<String> fields = new ArrayList<>();
-
if (qf != null) {
+ ArrayList<String> fields = new ArrayList<>();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
@@ -96,26 +93,31 @@ public class SimpleMLTQParser extends QParser {
}
}
}
+ // Parse field names and boosts from the fields
+ boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
+ fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
- Map<String, SchemaField> fieldNames = req.getSearcher().getSchema().getFields();
- for (String fieldName : fieldNames.keySet()) {
- if (fieldNames.get(fieldName).indexed() && fieldNames.get(fieldName).stored())
- if (fieldNames.get(fieldName).getType().getNumericType() == null)
+ Map<String, SchemaField> fieldDefinitions = req.getSearcher().getSchema().getFields();
+ ArrayList<String> fields = new ArrayList();
+ for (String fieldName : fieldDefinitions.keySet()) {
+ if (fieldDefinitions.get(fieldName).indexed() && fieldDefinitions.get(fieldName).stored())
+ if (fieldDefinitions.get(fieldName).getType().getNumericType() == null)
fields.add(fieldName);
}
+ fieldNames = fields.toArray(new String[0]);
}
- if( fields.size() < 1 ) {
+ if (fieldNames.length < 1) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
"MoreLikeThis requires at least one similarity field: qf" );
}
- mlt.setFieldNames(fields.toArray(new String[fields.size()]));
+ mlt.setFieldNames(fieldNames);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Query rawMLTQuery = mlt.like(scoreDocs[0].doc);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
- if (boostFields.size() > 0) {
+ if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
index ffcde2f..e3a8d7b 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/CloudMLTQParserTest.java
@@ -121,6 +121,27 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
}
assertArrayEquals(expectedIds, actualIds);
+ queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=false mintf=0 mindf=0}30"));
+ solrDocuments = queryResponse.getResults();
+ expectedIds = new int[]{31, 18, 23, 13, 14, 20, 22, 32, 19, 21};
+ actualIds = new int[solrDocuments.size()];
+ i = 0;
+ for (SolrDocument solrDocument : solrDocuments) {
+ actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+ }
+ System.out.println("DEBUG ACTUAL IDS 1: " + Arrays.toString(actualIds));
+ assertArrayEquals(expectedIds, actualIds);
+
+ queryResponse = cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u^10,lowerfilt1_u^1000 boost=true mintf=0 mindf=0}30"));
+ solrDocuments = queryResponse.getResults();
+ expectedIds = new int[]{29, 31, 32, 18, 23, 13, 14, 20, 22, 19};
+ actualIds = new int[solrDocuments.size()];
+ i = 0;
+ for (SolrDocument solrDocument : solrDocuments) {
+ actualIds[i++] = Integer.valueOf(String.valueOf(solrDocument.getFieldValue("id")));
+ }
+ System.out.println("DEBUG ACTUAL IDS 2: " + Arrays.toString(actualIds));
+ assertArrayEquals(expectedIds, actualIds);
}
@Test
@@ -220,7 +241,7 @@ public class CloudMLTQParserTest extends SolrCloudTestCase {
}
assertArrayEquals(expectedIds, actualIds);
}
-
+
public void testInvalidSourceDocument() throws IOException {
SolrException e = expectThrows(SolrException.class, () -> {
cluster.getSolrClient().query(COLLECTION, new SolrQuery("{!mlt qf=lowerfilt_u}999999"));
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/2b4e3dd9/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
index 6f3570f..026c594 100644
--- a/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
+++ b/solr/core/src/test/org/apache/solr/search/mlt/SimpleMLTQParserTest.java
@@ -108,8 +108,37 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
);
params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=false mintf=0 mindf=0}30");
+ assertQ(req(params),
+ "//result/doc[1]/int[@name='id'][.='31']",
+ "//result/doc[2]/int[@name='id'][.='13']",
+ "//result/doc[3]/int[@name='id'][.='14']",
+ "//result/doc[4]/int[@name='id'][.='18']",
+ "//result/doc[5]/int[@name='id'][.='20']",
+ "//result/doc[6]/int[@name='id'][.='22']",
+ "//result/doc[7]/int[@name='id'][.='23']",
+ "//result/doc[8]/int[@name='id'][.='32']",
+ "//result/doc[9]/int[@name='id'][.='15']",
+ "//result/doc[10]/int[@name='id'][.='16']"
+ );
+
+ params = new ModifiableSolrParams();
+ params.set(CommonParams.Q, "{!mlt qf=lowerfilt,lowerfilt1^1000 boost=true mintf=0 mindf=0}30");
+ assertQ(req(params),
+ "//result/doc[1]/int[@name='id'][.='29']",
+ "//result/doc[2]/int[@name='id'][.='31']",
+ "//result/doc[3]/int[@name='id'][.='32']",
+ "//result/doc[4]/int[@name='id'][.='13']",
+ "//result/doc[5]/int[@name='id'][.='14']",
+ "//result/doc[6]/int[@name='id'][.='18']",
+ "//result/doc[7]/int[@name='id'][.='20']",
+ "//result/doc[8]/int[@name='id'][.='22']",
+ "//result/doc[9]/int[@name='id'][.='23']",
+ "//result/doc[10]/int[@name='id'][.='15']"
+ );
+
+ params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=0 mintf=1}26");
- params.set(CommonParams.DEBUG, "true");
assertQ(req(params),
"//result/doc[1]/int[@name='id'][.='29']",
"//result/doc[2]/int[@name='id'][.='27']",
@@ -118,14 +147,12 @@ public class SimpleMLTQParserTest extends SolrTestCaseJ4 {
params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt mindf=10 mintf=1}26");
- params.set(CommonParams.DEBUG, "true");
assertQ(req(params),
"//result[@numFound='0']"
);
params = new ModifiableSolrParams();
params.set(CommonParams.Q, "{!mlt qf=lowerfilt minwl=3 mintf=1 mindf=1}26");
- params.set(CommonParams.DEBUG, "true");
assertQ(req(params),
"//result[@numFound='3']"
);