You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by an...@apache.org on 2014/10/28 19:23:56 UTC

svn commit: r1634939 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/queries/ lucene/queries/src/java/org/apache/lucene/queries/mlt/ solr/ solr/core/ solr/core/src/java/org/apache/solr/search/ solr/core/src/java/org/apache/solr/search/mlt/ solr/...

Author: anshum
Date: Tue Oct 28 18:23:55 2014
New Revision: 1634939

URL: http://svn.apache.org/r1634939
Log:
SOLR-6248: MoreLikeThis QParser that works in standalone/cloud mode (merge from trunk)

Added:
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/mlt/
      - copied from r1634937, lucene/dev/trunk/solr/core/src/java/org/apache/solr/search/mlt/
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/mlt/
      - copied from r1634937, lucene/dev/trunk/solr/core/src/test/org/apache/solr/search/mlt/
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/queries/   (props changed)
    lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
    lucene/dev/branches/branch_5x/solr/   (props changed)
    lucene/dev/branches/branch_5x/solr/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/solr/core/   (props changed)
    lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
    lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java

Modified: lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java?rev=1634939&r1=1634938&r2=1634939&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java (original)
+++ lucene/dev/branches/branch_5x/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java Tue Oct 28 18:23:55 2014
@@ -33,10 +33,8 @@ import org.apache.lucene.search.TermQuer
 import org.apache.lucene.search.similarities.DefaultSimilarity;
 import org.apache.lucene.search.similarities.TFIDFSimilarity;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.PriorityQueue;
-import org.apache.lucene.util.UnicodeUtil;
 
 import java.io.IOException;
 import java.io.Reader;
@@ -589,6 +587,20 @@ public final class MoreLikeThis {
   }
 
   /**
+   * 
+   * @param filteredDocument Document with field values extracted for selected fields.
+   * @return More Like This query for the passed document.
+   */
+  public Query like(Map<String, ArrayList<String>> filteredDocument) throws IOException {
+    if (fieldNames == null) {
+      // gather list of valid fields from lucene
+      Collection<String> fields = MultiFields.getIndexedFields(ir);
+      fieldNames = fields.toArray(new String[fields.size()]);
+    }
+    return createQuery(retrieveTerms(filteredDocument));
+  }
+
+  /**
    * Return a query that will return docs like the passed Readers.
    * This was added in order to treat multi-value fields.
    *
@@ -741,6 +753,24 @@ public final class MoreLikeThis {
     return createQueue(termFreqMap);
   }
 
+
+  private PriorityQueue<ScoreTerm> retrieveTerms(Map<String, ArrayList<String>> fields) throws 
+      IOException {
+    HashMap<String,Int> termFreqMap = new HashMap();
+    for (String fieldName : fieldNames) {
+
+      for (String field : fields.keySet()) {
+        ArrayList<String> fieldValues = fields.get(field);
+        for(String fieldValue:fieldValues) {
+          if (fieldValue != null) {
+            addTermFrequencies(new StringReader(fieldValue), termFreqMap,
+                fieldName);
+          }
+        }
+      }
+    }
+    return createQueue(termFreqMap);
+  }
   /**
    * Adds terms and frequencies found in vector into the Map termFreqMap
    *

Modified: lucene/dev/branches/branch_5x/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/CHANGES.txt?rev=1634939&r1=1634938&r2=1634939&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/solr/CHANGES.txt Tue Oct 28 18:23:55 2014
@@ -167,6 +167,10 @@ New Features
   properties to allow easier overriding of just the right piece of the Solr URL. 
   (ehatcher)
 
+* SOLR-6248: MoreLikeThis QParser that accepts a document id and returns documents that
+  have similar content. It works in standalone/cloud mode and shares logic with the
+  Lucene MoreLikeThis class (Anshum Gupta).
+
 Bug Fixes
 ----------------------
 

Modified: lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/QParserPlugin.java?rev=1634939&r1=1634938&r2=1634939&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/QParserPlugin.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/java/org/apache/solr/search/QParserPlugin.java Tue Oct 28 18:23:55 2014
@@ -22,6 +22,7 @@ import org.apache.solr.core.SolrInfoMBea
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.search.join.BlockJoinChildQParserPlugin;
 import org.apache.solr.search.join.BlockJoinParentQParserPlugin;
+import org.apache.solr.search.mlt.MLTQParserPlugin;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
 
 import java.net.URL;
@@ -63,7 +64,8 @@ public abstract class QParserPlugin impl
     SimpleQParserPlugin.NAME, SimpleQParserPlugin.class,
     ComplexPhraseQParserPlugin.NAME, ComplexPhraseQParserPlugin.class,
     ReRankQParserPlugin.NAME, ReRankQParserPlugin.class,
-    ExportQParserPlugin.NAME, ExportQParserPlugin.class
+    ExportQParserPlugin.NAME, ExportQParserPlugin.class,
+    MLTQParserPlugin.NAME, MLTQParserPlugin.class
   };
 
   /** return a {@link QParser} */

Modified: lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java?rev=1634939&r1=1634938&r2=1634939&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java (original)
+++ lucene/dev/branches/branch_5x/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java Tue Oct 28 18:23:55 2014
@@ -19,6 +19,7 @@ package org.apache.solr.search;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.QueryUtils;
 import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.request.SolrRequestInfo;
 import org.apache.solr.response.SolrQueryResponse;
@@ -838,6 +839,21 @@ public class QueryEqualityTest extends S
     }
   }
 
+  public void testQueryMLT() throws Exception {
+    assertU(adoc("id", "1", "lowerfilt", "sample data"));
+    assertU(commit());
+    SolrQueryRequest req = req("qf","lowerfilt","id","1");
+    try {
+      assertQueryEquals("mlt", req,
+          "{!mlt qf=lowerfilt id=1}");
+    } finally {
+      delQ("*:*");
+      assertU(commit());
+      req.close();
+    }
+  }
+
+
   /**
    * NOTE: defType is not only used to pick the parser, but also to record 
    * the parser being tested for coverage sanity checking