You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ko...@apache.org on 2010/12/22 02:13:40 UTC

svn commit: r1051725 - in /lucene/dev/branches/branch_3x: ./ lucene/ solr/ solr/contrib/clustering/ solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carro...

Author: koji
Date: Wed Dec 22 01:13:39 2010
New Revision: 1051725

URL: http://svn.apache.org/viewvc?rev=1051725&view=rev
Log:
SOLR-2282: add distributed support to search results clustering

Added:
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
      - copied unchanged from r1051715, lucene/dev/trunk/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/DistributedClusteringComponentTest.java
Modified:
    lucene/dev/branches/branch_3x/   (props changed)
    lucene/dev/branches/branch_3x/lucene/   (props changed)
    lucene/dev/branches/branch_3x/solr/   (props changed)
    lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt
    lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/CHANGES.txt Wed Dec 22 01:13:39 2010
@@ -6,7 +6,7 @@ See http://wiki.apache.org/solr/Clusteri
 
 CHANGES
 
-$Id:$
+$Id$
 
 ================== Release XXXX ==================
 
@@ -17,6 +17,9 @@ $Id:$
 * SOLR-1804: Re-enabled clustering on trunk, updated to latest version of Carrot2.  No more LGPL run-time dependencies.
   This release of C2 also does not have a specific Lucene dependency.  (Stanislaw Osinski, gsingers)
 
+* SOLR-2282: Add distributed search support for search result clustering.
+  (Brad Giaccio, koji)
+
 ================== Release 1.4.0 ==================
 
 Solr Clustering will be released for the first time in Solr 1.4.  See http://wiki.apache.org/solr/ClusteringComponent

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/build.xml Wed Dec 22 01:13:39 2010
@@ -44,6 +44,10 @@
     <pathelement location="${common-solr.dir}/build/tests"/> <!-- include solr test code -->
     <pathelement location="${common-solr.dir}/../lucene/build/classes/test" />  <!-- include some lucene test code -->
     <path refid="common.classpath"/>
+    <!-- DistributedClusteringComponentTest uses Jetty -->
+    <fileset dir="${solr-path}/example/lib">
+      <include name="**/*.jar" />
+    </fileset>
   </path>
 
   <target name="clean">

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/ClusteringComponent.java Wed Dec 22 01:13:39 2010
@@ -16,14 +16,22 @@ package org.apache.solr.handler.clusteri
  * limitations under the License.
  */
 
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceLoader;
 import org.apache.solr.handler.clustering.carrot2.CarrotClusteringEngine;
+import org.apache.solr.handler.clustering.carrot2.CarrotParams;
 import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.handler.component.SearchComponent;
+import org.apache.solr.handler.component.ShardRequest;
+import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.search.DocListAndSet;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
 import org.apache.solr.util.plugin.SolrCoreAware;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -31,7 +39,9 @@ import org.slf4j.LoggerFactory;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Map;
+import java.util.Set;
 
 
 /**
@@ -53,7 +63,7 @@ public class ClusteringComponent extends
   public static final String COMPONENT_NAME = "clustering";
   private NamedList initParams;
 
-
+  @Override
   public void prepare(ResponseBuilder rb) throws IOException {
     SolrParams params = rb.req.getParams();
     if (!params.getBool(COMPONENT_NAME, false)) {
@@ -61,18 +71,21 @@ public class ClusteringComponent extends
     }
   }
 
+  @Override
   public void process(ResponseBuilder rb) throws IOException {
     SolrParams params = rb.req.getParams();
     if (!params.getBool(COMPONENT_NAME, false)) {
       return;
     }
-    String name = params.get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
+    String name = getClusteringEngineName(rb);
     boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
     if (useResults == true) {
-      SearchClusteringEngine engine = searchClusteringEngines.get(name);
+      SearchClusteringEngine engine = getSearchClusteringEngine(rb);
       if (engine != null) {
         DocListAndSet results = rb.getResults();
-        Object clusters = engine.cluster(rb.getQuery(), results.docList, rb.req);
+        Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(results.docList.size());
+        SolrDocumentList solrDocList = engine.getSolrDocumentList(results.docList, rb.req, docIds);
+        Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
         rb.rsp.add("clusters", clusters);
       } else {
         log.warn("No engine for: " + name);
@@ -97,6 +110,72 @@ public class ClusteringComponent extends
       }
     }
   }
+  
+  private SearchClusteringEngine getSearchClusteringEngine(ResponseBuilder rb){
+    return searchClusteringEngines.get(getClusteringEngineName(rb));
+  }
+  
+  private String getClusteringEngineName(ResponseBuilder rb){
+    return rb.req.getParams().get(ClusteringParams.ENGINE_NAME, ClusteringEngine.DEFAULT_ENGINE_NAME);
+  }
+
+  @Override
+  public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) {
+    SolrParams params = rb.req.getParams();
+    if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
+      return;
+    }
+    sreq.params.remove(COMPONENT_NAME);
+    if( ( sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS ) != 0 ){
+      String fl = sreq.params.get(CommonParams.FL,"*");
+      // if fl=* then we don't need check
+      if( fl.indexOf( '*' ) >= 0 ) return;
+      Set<String> fields = getSearchClusteringEngine(rb).getFieldsToLoad(rb.req);
+      if( fields == null || fields.size() == 0 ) return;
+      StringBuilder sb = new StringBuilder();
+      String[] flparams = fl.split( "[,\\s]+" );
+      Set<String> flParamSet = new HashSet<String>(flparams.length);
+      for( String flparam : flparams ){
+        // no need trim() because of split() by \s+
+        flParamSet.add(flparam);
+      }
+      for( String aFieldToLoad : fields ){
+        if( !flParamSet.contains( aFieldToLoad ) ){
+          sb.append( ',' ).append( aFieldToLoad );
+        }
+      }
+      if( sb.length() > 0 ){
+        sreq.params.set( CommonParams.FL, fl + sb.toString() );
+      }
+    }
+  }
+
+  @Override
+  public void finishStage(ResponseBuilder rb) {
+    SolrParams params = rb.req.getParams();
+    if (!params.getBool(COMPONENT_NAME, false) || !params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false)) {
+      return;
+    }
+    if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
+      SearchClusteringEngine engine = getSearchClusteringEngine(rb);
+      if (engine != null) {
+        SolrDocumentList solrDocList = (SolrDocumentList)rb.rsp.getValues().get("response");
+        // TODO: Currently, docIds is set to null in distributed environment.
+        // This causes CarrotParams.PRODUCE_SUMMARY doesn't work.
+        // To work CarrotParams.PRODUCE_SUMMARY under distributed mode, we can choose either one of:
+        // (a) In each shard, ClusteringComponent produces summary and finishStage()
+        //     merges these summaries.
+        // (b) Adding doHighlighting(SolrDocumentList, ...) method to SolrHighlighter and
+        //     making SolrHighlighter uses "external text" rather than stored values to produce snippets.
+        Map<SolrDocument,Integer> docIds = null;
+        Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
+        rb.rsp.add("clusters", clusters);
+      } else {
+        String name = getClusteringEngineName(rb);
+        log.warn("No engine for: " + name);
+      }
+    }
+  }
 
   @Override
   @SuppressWarnings("unchecked")
@@ -174,17 +253,17 @@ public class ClusteringComponent extends
 
   @Override
   public String getVersion() {
-    return "$Revision:$";
+    return "$Revision$";
   }
 
   @Override
   public String getSourceId() {
-    return "$Id:$";
+    return "$Id$";
   }
 
   @Override
   public String getSource() {
-    return "$URL:$";
+    return "$URL$";
   }
 
 }

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/SearchClusteringEngine.java Wed Dec 22 01:13:39 2010
@@ -16,12 +16,16 @@ package org.apache.solr.handler.clusteri
  * limitations under the License.
  */
 
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.search.DocList;
-import org.apache.solr.request.SolrQueryRequest;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
 import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.search.DocList;
+import org.apache.solr.util.SolrPluginUtils;
 
 
 /**
@@ -30,8 +34,27 @@ import org.apache.lucene.search.Query;
  **/
 public abstract class SearchClusteringEngine extends ClusteringEngine {
 
-
+  @Deprecated
   public abstract Object cluster(Query query, DocList docList, SolrQueryRequest sreq);
 
-
+  // TODO: need DocList, too?
+  public abstract Object cluster(Query query, SolrDocumentList solrDocumentList,
+      Map<SolrDocument,Integer> docIds, SolrQueryRequest sreq);
+
+  /**
+   * Returns the set of field names to load.
+   * Concrete classes can override this method if needed.
+   * Default implementation returns null, that is, all stored fields are loaded.
+   * @param sreq
+   * @return set of field names to load
+   */
+  protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
+    return null;
+  }
+
+  public SolrDocumentList getSolrDocumentList(DocList docList, SolrQueryRequest sreq,
+      Map<SolrDocument, Integer> docIds) throws IOException{
+    return SolrPluginUtils.docListToSolrDocumentList(
+        docList, sreq.getSearcher(), getFieldsToLoad(sreq), docIds);
+  }
 }

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/main/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Wed Dec 22 01:13:39 2010
@@ -18,25 +18,38 @@ package org.apache.solr.handler.clusteri
  */
 
 import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
 
 import org.apache.commons.lang.StringUtils;
-import org.apache.lucene.document.FieldSelector;
-import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.search.Query;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.params.HighlightParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.util.SimpleOrderedMap;
-import org.apache.solr.common.SolrException;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.handler.clustering.SearchClusteringEngine;
 import org.apache.solr.highlight.SolrHighlighter;
 import org.apache.solr.request.LocalSolrQueryRequest;
 import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.search.*;
-import org.apache.solr.util.RefCounted;
-import org.carrot2.core.*;
+import org.apache.solr.search.DocList;
+import org.apache.solr.search.DocSlice;
+import org.apache.solr.search.SolrIndexSearcher;
+import org.apache.solr.util.SolrPluginUtils;
+import org.carrot2.core.Cluster;
+import org.carrot2.core.Controller;
+import org.carrot2.core.ControllerFactory;
+import org.carrot2.core.Document;
+import org.carrot2.core.IClusteringAlgorithm;
 import org.carrot2.core.attribute.AttributeNames;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -63,11 +76,25 @@ public class CarrotClusteringEngine exte
 
   private String idFieldName;
 
+  @Deprecated
   public Object cluster(Query query, DocList docList, SolrQueryRequest sreq) {
+    SolrIndexSearcher searcher = sreq.getSearcher();
+    SolrDocumentList solrDocList;
+    try {
+      Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
+      solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, getFieldsToLoad(sreq), docIds );
+      return cluster(query, solrDocList, docIds, sreq);
+    } catch (IOException e) {
+      throw new SolrException(ErrorCode.SERVER_ERROR, e);
+    }
+  }
+
+  public Object cluster(Query query, SolrDocumentList solrDocList,
+      Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
     try {
       // Prepare attributes for Carrot2 clustering call
       Map<String, Object> attributes = new HashMap<String, Object>();
-      List<Document> documents = getDocuments(docList, query, sreq);
+      List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
       attributes.put(AttributeNames.DOCUMENTS, documents);
       attributes.put(AttributeNames.QUERY, query.toString());
 
@@ -79,7 +106,7 @@ public class CarrotClusteringEngine exte
               clusteringAlgorithmClass).getClusters(), sreq.getParams());
     } catch (Exception e) {
       log.error("Carrot2 clustering failed", e);
-      throw new RuntimeException(e);
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
     }
   }
 
@@ -114,31 +141,36 @@ public class CarrotClusteringEngine exte
     return result;
   }
 
-  /**
-   * Prepares Carrot2 documents for clustering.
-   */
-  private List<Document> getDocuments(DocList docList,
-                                      Query query, final SolrQueryRequest sreq) throws IOException {
-    SolrHighlighter highlighter = null;
+  @Override
+  protected Set<String> getFieldsToLoad(SolrQueryRequest sreq){
     SolrParams solrParams = sreq.getParams();
-    SolrCore core = sreq.getCore();
 
     // Names of fields to deliver content for clustering
     String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
     String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
-    String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME,
-            titleField);
+    String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
     if (StringUtils.isBlank(snippetField)) {
       throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME
               + " must not be blank.");
     }
-    Set<String> fieldsToLoad = Sets.newHashSet(urlField, titleField,
-            snippetField, idFieldName);
+    return Sets.newHashSet(urlField, titleField, snippetField, idFieldName);
+  }
+  
+  /**
+   * Prepares Carrot2 documents for clustering.
+   */
+  private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds,
+                                      Query query, final SolrQueryRequest sreq) throws IOException {
+    SolrHighlighter highlighter = null;
+    SolrParams solrParams = sreq.getParams();
+    SolrCore core = sreq.getCore();
 
+    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
+    String titleField = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
+    String snippetField = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleField);
+    
     // Get the documents
-    DocIterator docsIter = docList.iterator();
-    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY,
-            false);
+    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
 
     SolrQueryRequest req = null;
     String[] snippetFieldAry = null;
@@ -164,20 +196,20 @@ public class CarrotClusteringEngine exte
       }
     }
 
-    SolrIndexSearcher searcher = sreq.getSearcher();
-    List<Document> result = new ArrayList<Document>(docList.size());
+    Iterator<SolrDocument> docsIter = solrDocList.iterator();
+    List<Document> result = new ArrayList<Document>(solrDocList.size());
 
     float[] scores = {1.0f};
     int[] docsHolder = new int[1];
     Query theQuery = query;
 
     while (docsIter.hasNext()) {
-      Integer id = docsIter.next();
-      org.apache.lucene.document.Document doc = searcher.doc(id,
-              fieldsToLoad);
-      String snippet = getValue(doc, snippetField);
-      if (produceSummary == true) {
-        docsHolder[0] = id.intValue();
+      SolrDocument sdoc = docsIter.next();
+      String snippet = getValue(sdoc, snippetField);
+      // TODO: docIds will be null when running distributed search.
+      // See comment in ClusteringComponent#finishStage().
+      if (produceSummary && docIds != null) {
+        docsHolder[0] = docIds.get(sdoc).intValue();
         DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
         NamedList highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
         if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
@@ -189,15 +221,16 @@ public class CarrotClusteringEngine exte
           }
         }
       }
-      Document carrotDocument = new Document(getValue(doc, titleField),
-              snippet, doc.get(urlField));
-      carrotDocument.setField("solrId", doc.get(idFieldName));
+      Document carrotDocument = new Document(getValue(sdoc, titleField),
+              snippet, (String)sdoc.getFieldValue(urlField));
+      carrotDocument.setField("solrId", sdoc.getFieldValue(idFieldName));
       result.add(carrotDocument);
     }
 
     return result;
   }
 
+  @Deprecated
   protected String getValue(org.apache.lucene.document.Document doc,
                             String field) {
     StringBuilder result = new StringBuilder();
@@ -211,6 +244,20 @@ public class CarrotClusteringEngine exte
     return result.toString().trim();
   }
 
+  protected String getValue(SolrDocument sdoc, String field) {
+    StringBuilder result = new StringBuilder();
+    Collection<Object> vals = sdoc.getFieldValues(field);
+    if(vals == null) return "";
+    Iterator<Object> ite = vals.iterator();
+    while(ite.hasNext()){
+      // Join multiple values with a period so that Carrot2 does not pick up
+      // phrases that cross field value boundaries (in most cases it would
+      // create useless phrases).
+      result.append((String)ite.next()).append(" . ");
+    }
+    return result.toString().trim();
+  }
+
   private List clustersToNamedList(List<Cluster> carrotClusters,
                                    SolrParams solrParams) {
     List result = new ArrayList();

Modified: lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1051725&r1=1051724&r2=1051725&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/branches/branch_3x/solr/contrib/clustering/src/test/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Wed Dec 22 01:13:39 2010
@@ -22,6 +22,8 @@ import org.apache.lucene.search.MatchAll
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.TermQuery;
+import org.apache.solr.common.SolrDocument;
+import org.apache.solr.common.SolrDocumentList;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -31,11 +33,14 @@ import org.apache.solr.request.LocalSolr
 import org.apache.solr.search.DocList;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.RefCounted;
+import org.apache.solr.util.SolrPluginUtils;
 import org.carrot2.util.attribute.AttributeUtils;
 import org.junit.Test;
 
 import java.io.IOException;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 
 import static org.junit.Assert.*;
 
@@ -133,21 +138,23 @@ public class CarrotClusteringEngineTest 
       docList = searcher.getDocList(query, (Query) null, new Sort(), 0,
               numberOfDocs);
       assertEquals("docList size", expectedNumDocs, docList.matches());
+
+      ModifiableSolrParams solrParams = new ModifiableSolrParams();
+      solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
+      solrParams.add(clusteringParams);
+
+      // Perform clustering
+      LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
+      Map<SolrDocument,Integer> docIds = new HashMap<SolrDocument, Integer>(docList.size());
+      SolrDocumentList solrDocList = SolrPluginUtils.docListToSolrDocumentList( docList, searcher, engine.getFieldsToLoad(req), docIds );
+      List results = (List)engine.cluster(query, solrDocList, docIds, req);
+      req.close();
+      assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
+      checkClusters(results, false);
+      return results;
     } finally {
       ref.decref();
     }
-
-    ModifiableSolrParams solrParams = new ModifiableSolrParams();
-    solrParams.add(CarrotParams.PRODUCE_SUMMARY, "true");
-    solrParams.add(clusteringParams);
-
-    // Perform clustering
-    LocalSolrQueryRequest req = new LocalSolrQueryRequest(h.getCore(), solrParams);
-    List results = (List) engine.cluster(query, docList, req);
-    req.close();
-    assertEquals("number of clusters: " + results, expectedNumClusters, results.size());
-    checkClusters(results, false);
-    return results;
   }
 
   private void checkClusters(List results, int expectedDocCount,