You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2014/06/03 09:51:56 UTC
svn commit: r1599442 - in /lucene/dev/trunk: lucene/
lucene/classification/src/java/org/apache/lucene/classification/
lucene/queries/src/java/org/apache/lucene/queries/mlt/
lucene/queries/src/test/org/apache/lucene/queries/mlt/
solr/core/src/java/org/a...
Author: simonw
Date: Tue Jun 3 07:51:55 2014
New Revision: 1599442
URL: http://svn.apache.org/r1599442
Log:
LUCENE-5725: MoreLikeThis#like now accetps multiple values per field
Modified:
lucene/dev/trunk/lucene/CHANGES.txt
lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java
lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java
lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jun 3 07:51:55 2014
@@ -138,6 +138,10 @@ Changes in Backwards Compatibility Polic
API Changes
+* LUCENE-5725: MoreLikeThis#like now accetps multiple values per field.
+ The pre-existing method has been deprecated in favor of a variable arguments
+ for the like text. (Alex Ksikes via Simon Willnauer)
+
* LUCENE-5711: MergePolicy accepts an IndexWriter instance
on each method rather than holding state against a single
IndexWriter instance. (Simon Willnauer)
Modified: lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Tue Jun 3 07:51:55 2014
@@ -84,7 +84,7 @@ public class KNearestNeighborClassifier
}
BooleanQuery mltQuery = new BooleanQuery();
for (String textFieldName : textFieldNames) {
- mltQuery.add(new BooleanClause(mlt.like(new StringReader(text), textFieldName), BooleanClause.Occur.SHOULD));
+ mltQuery.add(new BooleanClause(mlt.like(textFieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
}
Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));
Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Jun 3 07:51:55 2014
@@ -15,23 +15,21 @@
*/
package org.apache.lucene.queries.mlt;
-import java.io.*;
-import java.util.*;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.util.BytesRef;
@@ -39,6 +37,15 @@ import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.UnicodeUtil;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
/**
* Generate "more like this" similarity queries.
@@ -581,12 +588,17 @@ public final class MoreLikeThis {
}
/**
- * Return a query that will return docs like the passed Reader.
+ * Return a query that will return docs like the passed Readers.
+ * This was added in order to treat multi-value fields.
*
- * @return a query that will return docs like the passed Reader.
+ * @return a query that will return docs like the passed Readers.
*/
- public Query like(Reader r, String fieldName) throws IOException {
- return createQuery(retrieveTerms(r, fieldName));
+ public Query like(String fieldName, Reader... readers) throws IOException {
+ Map<String, Int> words = new HashMap<>();
+ for (Reader r : readers) {
+ addTermFrequencies(r, words, fieldName);
+ }
+ return createQuery(createQueue(words));
}
/**
Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java Tue Jun 3 07:51:55 2014
@@ -26,7 +26,6 @@ import org.apache.lucene.search.BooleanC
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
-import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.StringReader;
import java.util.Arrays;
@@ -71,7 +70,7 @@ public class MoreLikeThisQuery extends Q
}
mlt.setMaxQueryTerms(maxQueryTerms);
mlt.setStopWords(stopWords);
- BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName);
+ BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText));
BooleanClause[] clauses = bq.getClauses();
//make at least half the terms match
bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));
Modified: lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java Tue Jun 3 07:51:55 2014
@@ -19,17 +19,18 @@ package org.apache.lucene.queries.mlt;
import java.io.IOException;
import java.io.StringReader;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
@@ -53,6 +54,8 @@ public class TestMoreLikeThis extends Lu
// Add series of docs with specific information for MoreLikeThis
addDoc(writer, "lucene");
addDoc(writer, "lucene release");
+ addDoc(writer, "apache");
+ addDoc(writer, "apache lucene");
reader = writer.getReader();
writer.shutdown();
@@ -88,8 +91,8 @@ public class TestMoreLikeThis extends Lu
float boostFactor = 5;
mlt.setBoostFactor(boostFactor);
- BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
- "lucene release"), "text");
+ BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
+ "lucene release"));
List<BooleanClause> clauses = query.clauses();
assertEquals("Expected " + originalValues.size() + " clauses.",
@@ -116,8 +119,8 @@ public class TestMoreLikeThis extends Lu
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] {"text"});
mlt.setBoost(true);
- BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
- "lucene release"), "text");
+ BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
+ "lucene release"));
List<BooleanClause> clauses = query.clauses();
for (BooleanClause clause : clauses) {
@@ -135,9 +138,29 @@ public class TestMoreLikeThis extends Lu
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
mlt.setFieldNames(new String[] {"text", "foobar"});
- mlt.like(new StringReader("this is a test"), "foobar");
+ mlt.like("foobar", new StringReader("this is a test"));
}
-
+
+ // LUCENE-5725
+ public void testMultiValues() throws Exception {
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+ mlt.setMinDocFreq(1);
+ mlt.setMinTermFreq(1);
+ mlt.setMinWordLen(1);
+ mlt.setFieldNames(new String[] {"text"});
+
+ BooleanQuery query = (BooleanQuery) mlt.like("text",
+ new StringReader("lucene"), new StringReader("lucene release"),
+ new StringReader("apache"), new StringReader("apache lucene"));
+ List<BooleanClause> clauses = query.clauses();
+ assertEquals("Expected 2 clauses only!", 2, clauses.size());
+ for (BooleanClause clause : clauses) {
+ Term term = ((TermQuery) clause.getQuery()).getTerm();
+ assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
+ }
+ }
+
// just basic equals/hashcode etc
public void testMoreLikeThisQuery() throws Exception {
Query query = new MoreLikeThisQuery("this is a test", new String[] { "text" }, new MockAnalyzer(random()), "text");
Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java Tue Jun 3 07:51:55 2014
@@ -370,7 +370,7 @@ public class MoreLikeThisHandler extends
public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
{
// analyzing with the first field: previous (stupid) behavior
- rawMLTQuery = mlt.like(reader, mlt.getFieldNames()[0]);
+ rawMLTQuery = mlt.like(mlt.getFieldNames()[0], reader);
boostedMLTQuery = getBoostedQuery( rawMLTQuery );
if( terms != null ) {
fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );