You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2014/06/03 09:51:56 UTC

svn commit: r1599442 - in /lucene/dev/trunk: lucene/ lucene/classification/src/java/org/apache/lucene/classification/ lucene/queries/src/java/org/apache/lucene/queries/mlt/ lucene/queries/src/test/org/apache/lucene/queries/mlt/ solr/core/src/java/org/a...

Author: simonw
Date: Tue Jun  3 07:51:55 2014
New Revision: 1599442

URL: http://svn.apache.org/r1599442
Log:
LUCENE-5725: MoreLikeThis#like now accetps multiple values per field

Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
    lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
    lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java
    lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java
    lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Tue Jun  3 07:51:55 2014
@@ -138,6 +138,10 @@ Changes in Backwards Compatibility Polic
 
 API Changes
 
+* LUCENE-5725: MoreLikeThis#like now accetps multiple values per field.
+  The pre-existing method has been deprecated in favor of a variable arguments
+  for the like text. (Alex Ksikes via Simon Willnauer)
+
 * LUCENE-5711: MergePolicy accepts an IndexWriter instance
   on each method rather than holding state against a single
   IndexWriter instance. (Simon Willnauer)

Modified: lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java (original)
+++ lucene/dev/trunk/lucene/classification/src/java/org/apache/lucene/classification/KNearestNeighborClassifier.java Tue Jun  3 07:51:55 2014
@@ -84,7 +84,7 @@ public class KNearestNeighborClassifier 
     }
     BooleanQuery mltQuery = new BooleanQuery();
     for (String textFieldName : textFieldNames) {
-      mltQuery.add(new BooleanClause(mlt.like(new StringReader(text), textFieldName), BooleanClause.Occur.SHOULD));
+      mltQuery.add(new BooleanClause(mlt.like(textFieldName, new StringReader(text)), BooleanClause.Occur.SHOULD));
     }
     Query classFieldQuery = new WildcardQuery(new Term(classFieldName, "*"));
     mltQuery.add(new BooleanClause(classFieldQuery, BooleanClause.Occur.MUST));

Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Jun  3 07:51:55 2014
@@ -15,23 +15,21 @@
  */
 package org.apache.lucene.queries.mlt;
 
-import java.io.*;
-import java.util.*;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.document.Document;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.StorableField;
 import org.apache.lucene.index.StoredDocument;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.BytesRef;
@@ -39,6 +37,15 @@ import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.UnicodeUtil;
 
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
 
 /**
  * Generate "more like this" similarity queries.
@@ -581,12 +588,17 @@ public final class MoreLikeThis {
   }
 
   /**
-   * Return a query that will return docs like the passed Reader.
+   * Return a query that will return docs like the passed Readers.
+   * This was added in order to treat multi-value fields.
    *
-   * @return a query that will return docs like the passed Reader.
+   * @return a query that will return docs like the passed Readers.
    */
-  public Query like(Reader r, String fieldName) throws IOException {
-    return createQuery(retrieveTerms(r, fieldName));
+  public Query like(String fieldName, Reader... readers) throws IOException {
+    Map<String, Int> words = new HashMap<>();
+    for (Reader r : readers) {
+      addTermFrequencies(r, words, fieldName);
+    }
+    return createQuery(createQueue(words));
   }
 
   /**

Modified: lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java (original)
+++ lucene/dev/trunk/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThisQuery.java Tue Jun  3 07:51:55 2014
@@ -26,7 +26,6 @@ import org.apache.lucene.search.BooleanC
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.Query;
 
-import java.io.ByteArrayInputStream;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.Arrays;
@@ -71,7 +70,7 @@ public class MoreLikeThisQuery extends Q
     }
     mlt.setMaxQueryTerms(maxQueryTerms);
     mlt.setStopWords(stopWords);
-    BooleanQuery bq = (BooleanQuery) mlt.like(new StringReader(likeText), fieldName);
+    BooleanQuery bq = (BooleanQuery) mlt.like(fieldName, new StringReader(likeText));
     BooleanClause[] clauses = bq.getClauses();
     //make at least half the terms match
     bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch));

Modified: lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java (original)
+++ lucene/dev/trunk/lucene/queries/src/test/org/apache/lucene/queries/mlt/TestMoreLikeThis.java Tue Jun  3 07:51:55 2014
@@ -19,17 +19,18 @@ package org.apache.lucene.queries.mlt;
 
 import java.io.IOException;
 import java.io.StringReader;
+import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.IndexSearcher;
@@ -53,6 +54,8 @@ public class TestMoreLikeThis extends Lu
     // Add series of docs with specific information for MoreLikeThis
     addDoc(writer, "lucene");
     addDoc(writer, "lucene release");
+    addDoc(writer, "apache");
+    addDoc(writer, "apache lucene");
 
     reader = writer.getReader();
     writer.shutdown();
@@ -88,8 +91,8 @@ public class TestMoreLikeThis extends Lu
     float boostFactor = 5;
     mlt.setBoostFactor(boostFactor);
     
-    BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
-        "lucene release"), "text");
+    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
+        "lucene release"));
     List<BooleanClause> clauses = query.clauses();
     
     assertEquals("Expected " + originalValues.size() + " clauses.",
@@ -116,8 +119,8 @@ public class TestMoreLikeThis extends Lu
     mlt.setMinWordLen(1);
     mlt.setFieldNames(new String[] {"text"});
     mlt.setBoost(true);
-    BooleanQuery query = (BooleanQuery) mlt.like(new StringReader(
-        "lucene release"), "text");
+    BooleanQuery query = (BooleanQuery) mlt.like("text", new StringReader(
+        "lucene release"));
     List<BooleanClause> clauses = query.clauses();
 
     for (BooleanClause clause : clauses) {
@@ -135,9 +138,29 @@ public class TestMoreLikeThis extends Lu
     mlt.setMinTermFreq(1);
     mlt.setMinWordLen(1);
     mlt.setFieldNames(new String[] {"text", "foobar"});
-    mlt.like(new StringReader("this is a test"), "foobar");
+    mlt.like("foobar", new StringReader("this is a test"));
   }
-  
+
+  // LUCENE-5725
+  public void testMultiValues() throws Exception {
+    MoreLikeThis mlt = new MoreLikeThis(reader);
+    mlt.setAnalyzer(new MockAnalyzer(random(), MockTokenizer.KEYWORD, false));
+    mlt.setMinDocFreq(1);
+    mlt.setMinTermFreq(1);
+    mlt.setMinWordLen(1);
+    mlt.setFieldNames(new String[] {"text"});
+
+    BooleanQuery query = (BooleanQuery) mlt.like("text",
+        new StringReader("lucene"), new StringReader("lucene release"),
+        new StringReader("apache"), new StringReader("apache lucene"));
+    List<BooleanClause> clauses = query.clauses();
+    assertEquals("Expected 2 clauses only!", 2, clauses.size());
+    for (BooleanClause clause : clauses) {
+      Term term = ((TermQuery) clause.getQuery()).getTerm();
+      assertTrue(Arrays.asList(new Term("text", "lucene"), new Term("text", "apache")).contains(term));
+    }
+  }
+
   // just basic equals/hashcode etc
   public void testMoreLikeThisQuery() throws Exception {
     Query query = new MoreLikeThisQuery("this is a test", new String[] { "text" }, new MockAnalyzer(random()), "text");

Modified: lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java?rev=1599442&r1=1599441&r2=1599442&view=diff
==============================================================================
--- lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java (original)
+++ lucene/dev/trunk/solr/core/src/java/org/apache/solr/handler/MoreLikeThisHandler.java Tue Jun  3 07:51:55 2014
@@ -370,7 +370,7 @@ public class MoreLikeThisHandler extends
     public DocListAndSet getMoreLikeThis( Reader reader, int start, int rows, List<Query> filters, List<InterestingTerm> terms, int flags ) throws IOException
     {
       // analyzing with the first field: previous (stupid) behavior
-      rawMLTQuery = mlt.like(reader, mlt.getFieldNames()[0]);
+      rawMLTQuery = mlt.like(mlt.getFieldNames()[0], reader);
       boostedMLTQuery = getBoostedQuery( rawMLTQuery );
       if( terms != null ) {
         fillInterestingTermsFromMLTQuery( boostedMLTQuery, terms );