You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2010/12/22 02:13:40 UTC
svn commit: r1051725 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/
solr/contrib/clustering/
solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/
solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carro...
Author: koji
Date: Wed Dec 22 01:13:39 2010
New Revision: 1051725
URL: http://svn.apache.org/viewvc?rev=1051725&view=rev
Log:
SOLR-2282: add distributed support to search results clustering
Added:
lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
- copied unchanged from r1051715, lucene/dev/trunk/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
Modified:
lucene/dev/branches/branch_3x/ (props changed)
lucene/dev/branches/branch_3x/lucene/ (props changed)
lucene/dev/branches/branch_3x/solr/ (props changed)
lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt
lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt Wed Dec 22 01:13:39 2010
@@ -6,7 +6,7 @@ See http://wiki.apache.org/solr/Clusteri
CHANGES
-$Id:$
+$Id$
================== Release XXXX ==================
@@ -17,6 +17,9 @@ $Id:$
* SOLR-1804: Re-enabled clustering on trunk, updated to latest version of Carrot2. No more LGPL run-time dependencies.
This release of C2 also does not have a specific Lucene dependency. (Stanislaw Osinski, gsingers)
+* SOLR-2282: Add distributed search support for search result clustering.
+ (Brad Giaccio, koji)
+
================== Release 1.4.0 ==================
Solr Clustering will be released for the first time in Solr 1.4. See http://wiki.apache.org/solr/ClusteringComponent
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml Wed Dec 22 01:13:39 2010
@@ -44,6 +44,10 @@
<pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
<pathelement location="${common-solr.dir}/../lucene/build/classes/test" /> <!-- include some lucene test code -->
<path refid="common.classpath"/>
+ <!-- DistributedClusteringComponentTest uses Jetty -->
+ <fileset dir="${solr-path}/example/lib">
+ <include name="**/*.jar" />
+ </fileset>
</path>
<target name="clean">
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java Wed Dec 22 01:13:39 2010
@@ -16,14 +16,22 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine;
+import org.apache.solr.handler.clustering.carrot2.CarrotParams;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.SearchComponent;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
import org.apache.solr.util.plugin.SolrCoreAware;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,7 +39,9 @@ import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Map;
+import java.util.Set;
/**
@@ -53,7 +63,7 @@ public class ClusteringComponent extends
public static final String COMPONENT_NAME = "clustering";
private NamedList initParams;
-
+ @Override
public void prepare(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
@@ -61,18 +71,21 @@ public class ClusteringComponent extends
}
}
+ @Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
- String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
+ String name = getClusteringEngineName(rb);
boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
if (useResults == true) {
- SearchClusteringEngine engine = searchClusteringEngines.get(name);
+ SearchClusteringEngine engine = getSearchClusteringEngine(rb);
if (engine != null) {
DocListAndSet results = rb.getResults();
- Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req);
+ Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
+ SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
+ Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
rb.rsp.add("clusters", clusters);
} else {
log.warn("No engine for: " + name);
@@ -97,6 +110,72 @@ public class ClusteringComponent extends
}
}
}
+
+ private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
+ return searchClusteringEngines.get(getClusteringEngineName(rb));
+ }
+
+ private String getClusteringEngineName(ResponseBuilder rb){
+ return rb.req.getParams().get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
+ }
+
+ @Override
+ public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
+ SolrParams params = rb.req.getParams();
+ if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
+ return;
+ }
+ sreq.params.remove(COMPONENT_NAME);
+ if( ( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0 ){
+ String fl = sreq.params.get(CommonParams.FL,"*");
+ // if fl=* then we don't need check
+ if( fl.indexOf( '*' ) >= 0 ) return;
+ Set<String> fields = getSearchClusteringEngine(rb).getFieldsToLoad(rb.req);
+ if( fields == null || fields.size() == 0 ) return;
+ StringBuilder sb = new StringBuilder();
+ String[] flparams = fl.split( "[,\\s]+" );
+ Set<String> flParamSet = new HashSet<String>(flparams.length);
+ for( String flparam : flparams ){
+ // no need trim() because of split() by \s+
+ flParamSet.add(flparam);
+ }
+ for( String aFieldToLoad : fields ){
+ if( !flParamSet.contains( aFieldToLoad ) ){
+ sb.append( ',' ).append( aFieldToLoad );
+ }
+ }
+ if( sb.length() > 0 ){
+ sreq.params.set( CommonParams.FL, fl + sb.toString() );
+ }
+ }
+ }
+
+ @Override
+ public void finishStage(ResponseBuilder rb) {
+ SolrParams params = rb.req.getParams();
+ if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
+ return;
+ }
+ if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
+ SearchClusteringEngine engine = getSearchClusteringEngine(rb);
+ if (engine != null) {
+ SolrDocumentList solrDocList = (SolrDocumentList)rb.rsp.getValues().get("response");
+ // TODO: Currently, docIds is set to null in distributed environment.
+ // This causes CarrotParams.PRODUCE_SUMMARY doesn't work.
+ // To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of:
+ // (a) In each shard, ClusteringComponent produces summary and finishStage()
+ // merges these summaries.
+ // (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and
+ // making SolrHighlighter uses "external text" rather than stored values to produce snippets.
+ Map<SolrDocument,Integer> docIds = null;
+ Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
+ rb.rsp.add("clusters", clusters);
+ } else {
+ String name = getClusteringEngineName(rb);
+ log.warn("No engine for: " + name);
+ }
+ }
+ }
@Override
@SuppressWarnings("unchecked")
@@ -174,17 +253,17 @@ public class ClusteringComponent extends
@Override
public String getVersion() {
- return "$Revision:$";
+ return "$Revision$";
}
@Override
public String getSourceId() {
- return "$Id:$";
+ return "$Id$";
}
@Override
public String getSource() {
- return "$URL:$";
+ return "$URL$";
}
}
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java Wed Dec 22 01:13:39 2010
@@ -16,12 +16,16 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.search.DocList;
-import org.apache.solr.request.SolrQueryRequest;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.DocList;
+import org.apache.solr.util.SolrPluginUtils;
/**
@@ -30,8 +34,27 @@ import org.apache.lucene.search.Query;
**/
public abstract class SearchClusteringEngine extends ClusteringEngine {
-
+ @Deprecated
public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
-
+ // TODO: need DocList, too?
+ public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
+ Map<SolrDocument,Integer> docIds, SolrQueryRequest sreq);
+
+ /**
+ * Returns the set of field names to load.
+ * Concrete classes can override this method if needed.
+ * Default implementation returns null, that is, all stored fields are loaded.
+ * @param sreq
+ * @return set of field names to load
+ */
+ protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
+ return null;
+ }
+
+ public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
+ Map<SolrDocument, Integer> docIds) throws IOException{
+ return SolrPluginUtils.docListToSolrDocumentList(
+ docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
+ }
}
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Wed Dec 22 01:13:39 2010
@@ -18,25 +18,38 @@ package org.apache.solr.handler.clusteri
*/
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
import org.apache.commons.lang.StringUtils;
-import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.common.SolrException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.highlight.SolrHighlighter;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.*;
-import org.apache.solr.util.RefCounted;
-import org.carrot2.core.*;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSlice;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
+import org.carrot2.core.Cluster;
+import org.carrot2.core.Controller;
+import org.carrot2.core.ControllerFactory;
+import org.carrot2.core.Document;
+import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.attribute.AttributeNames;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -63,11 +76,25 @@ public class CarrotClusteringEngine exte
private String idFieldName;
+ @Deprecated
public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
+ SolrIndexSearcher searcher = sreq.getSearcher();
+ SolrDocumentList solrDocList;
+ try {
+ Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
+ solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
+ return cluster(query, solrDocList, docIds, sreq);
+ } catch (IOException e) {
+ throw new SolrException(ErrorCode.SERVER_ERROR, e);
+ }
+ }
+
+ public Object cluster(Query query, SolrDocumentList solrDocList,
+ Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<String, Object>();
- List<Document> documents = getDocuments(docList, query, sreq);
+ List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
@@ -79,7 +106,7 @@ public class CarrotClusteringEngine exte
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
- throw new RuntimeException(e);
+ throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
@@ -114,31 +141,36 @@ public class CarrotClusteringEngine exte
return result;
}
- /**
- * Prepares Carrot2 documents for clustering.
- */
- private List<Document> getDocuments(DocList docList,
- Query query, final SolrQueryRequest sreq) throws IOException {
- SolrHighlighter highlighter = null;
+ @Override
+ protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
SolrParams solrParams = sreq.getParams();
- SolrCore core = sreq.getCore();
// Names of fields to deliver content for clustering
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
- String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
- titleField);
+ String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
if (StringUtils.isBlank(snippetField)) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
+ " must not be blank.");
}
- Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
- snippetField, idFieldName);
+ return Sets.newHashSet(urlField, titleField, snippetField, idFieldName);
+ }
+
+ /**
+ * Prepares Carrot2 documents for clustering.
+ */
+ private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds,
+ Query query, final SolrQueryRequest sreq) throws IOException {
+ SolrHighlighter highlighter = null;
+ SolrParams solrParams = sreq.getParams();
+ SolrCore core = sreq.getCore();
+ String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
+ String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
+ String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
+
// Get the documents
- DocIterator docsIter = docList.iterator();
- boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
- false);
+ boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
@@ -164,20 +196,20 @@ public class CarrotClusteringEngine exte
}
}
- SolrIndexSearcher searcher = sreq.getSearcher();
- List<Document> result = new ArrayList<Document>(docList.size());
+ Iterator<SolrDocument> docsIter = solrDocList.iterator();
+ List<Document> result = new ArrayList<Document>(solrDocList.size());
float[] scores = {1.0f};
int[] docsHolder = new int[1];
Query theQuery = query;
while (docsIter.hasNext()) {
- Integer id = docsIter.next();
- org.apache.lucene.document.Document doc = searcher.doc(id,
- fieldsToLoad);
- String snippet = getValue(doc, snippetField);
- if (produceSummary == true) {
- docsHolder[0] = id.intValue();
+ SolrDocument sdoc = docsIter.next();
+ String snippet = getValue(sdoc, snippetField);
+ // TODO: docIds will be null when running distributed search.
+ // See comment in ClusteringComponent#finishStage().
+ if (produceSummary && docIds != null) {
+ docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
@@ -189,15 +221,16 @@ public class CarrotClusteringEngine exte
}
}
}
- Document carrotDocument = new Document(getValue(doc, titleField),
- snippet, doc.get(urlField));
- carrotDocument.setField("solrId", doc.get(idFieldName));
+ Document carrotDocument = new Document(getValue(sdoc, titleField),
+ snippet, (String)sdoc.getFieldValue(urlField));
+ carrotDocument.setField("solrId", sdoc.getFieldValue(idFieldName));
result.add(carrotDocument);
}
return result;
}
+ @Deprecated
protected String getValue(org.apache.lucene.document.Document doc,
String field) {
StringBuilder result = new StringBuilder();
@@ -211,6 +244,20 @@ public class CarrotClusteringEngine exte
return result.toString().trim();
}
+ protected String getValue(SolrDocument sdoc, String field) {
+ StringBuilder result = new StringBuilder();
+ Collection<Object> vals = sdoc.getFieldValues(field);
+ if(vals == null) return "";
+ Iterator<Object> ite = vals.iterator();
+ while(ite.hasNext()){
+ // Join multiple values with a period so that Carrot2 does not pick up
+ // phrases that cross field value boundaries (in most cases it would
+ // create useless phrases).
+ result.append((String)ite.next()).append(" . ");
+ }
+ return result.toString().trim();
+ }
+
private List clustersToNamedList(List<Cluster> carrotClusters,
SolrParams solrParams) {
List result = new ArrayList();
Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Wed Dec 22 01:13:39 2010
@@ -22,6 +22,8 @@ import org.apache.lucene.search.MatchAll
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
@@ -31,11 +33,14 @@ import org.apache.solr.request.LocalSolr
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.SolrPluginUtils;
import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test;
import java.io.IOException;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import static org.junit.Assert.*;
@@ -133,21 +138,23 @@ public class CarrotClusteringEngineTest
docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
numberOfDocs);
assertEquals("docList size", expectedNumDocs, docList.matches());
+
+ ModifiableSolrParams solrParams = new ModifiableSolrParams();
+ solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
+ solrParams.add(clusteringParams);
+
+ // Perform clustering
+ LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
+ Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
+ SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, engine.getFieldsToLoad(req), docIds );
+ List results = (List)engine.cluster(query, solrDocList, docIds, req);
+ req.close();
+ assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
+ checkClusters(results, false);
+ return results;
} finally {
ref.decref();
}
-
- ModifiableSolrParams solrParams = new ModifiableSolrParams();
- solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
- solrParams.add(clusteringParams);
-
- // Perform clustering
- LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
- List results = (List) engine.cluster(query, docList, req);
- req.close();
- assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
- checkClusters(results, false);
- return results;
}
private void checkClusters(List results, int expectedDocCount,