You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2013/09/11 20:45:28 UTC
svn commit: r1521978 - in /lucene/dev/trunk/solr: ./
contrib/clustering/src/java/org/apache/solr/handler/clustering/
contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/
contrib/clustering/src/test-files/clustering/solr/collection1/c...
Author: dweiss
Date: Wed Sep 11 18:45:27 2013
New Revision: 1521978
URL: http://svn.apache.org/r1521978
Log:
SOLR-5219: Rewritten selection of the default search and document clustering algorithms.
Added:
lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/lingo-attributes.xml
Removed:
lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml
Modified:
lucene/dev/trunk/solr/CHANGES.txt
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Sep 11 18:45:27 2013
@@ -104,6 +104,9 @@ Detailed Change List
New Features
----------------------
+* SOLR-5219: Rewritten selection of the default search and document clustering
+ algorithms. (Dawid Weiss)
+
* SOLR-5202: Support easier overrides of Carrot2 clustering attributes via
XML data sets exported from the Workbench. (Dawid Weiss)
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java Wed Sep 11 18:45:27 2013
@@ -16,6 +16,16 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+
+import org.apache.commons.lang.StringUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
@@ -32,41 +42,93 @@ import org.apache.solr.util.plugin.SolrC
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.io.IOException;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Set;
+import com.google.common.collect.Maps;
/**
- * Provide a plugin for clustering results. Can either be for search results (i.e. via Carrot2) or for
- * clustering documents (i.e. via Mahout)
- * <p/>
- * This engine is experimental. Output from this engine is subject to change in future releases.
- *
- * <pre class="prettyprint" >
- * <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering">
- * <lst name="engine">
- * <str name="name">default</str>
- * <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
- * </lst>
- * </searchComponent></pre>
+ * Provide a plugin for performing cluster analysis. This can either be applied to
+ * search results (e.g., via <a href="http://project.carrot2.org">Carrot<sup>2</sup></a>) or for
+ * clustering documents (e.g., via <a href="http://mahout.apache.org/">Mahout</a>).
+ * <p>
+ * This engine is experimental. Output from this engine is subject to change in future releases.</p>
+ * <p>
+ * See Solr example for configuration examples.</p>
*/
public class ClusteringComponent extends SearchComponent implements SolrCoreAware {
private transient static Logger log = LoggerFactory.getLogger(ClusteringComponent.class);
- private Map<String, SearchClusteringEngine> searchClusteringEngines = new HashMap<String, SearchClusteringEngine>();
- private Map<String, DocumentClusteringEngine> documentClusteringEngines = new HashMap<String, DocumentClusteringEngine>();
-
/**
* Base name for all component parameters. This name is also used to
* register this component with SearchHandler.
*/
public static final String COMPONENT_NAME = "clustering";
+
+ /**
+ * Declaration-order list of search clustering engines.
+ */
+ private final LinkedHashMap<String, SearchClusteringEngine> searchClusteringEngines = Maps.newLinkedHashMap();
- private NamedList initParams;
+ /**
+ * Declaration order list of document clustering engines.
+ */
+ private final LinkedHashMap<String, DocumentClusteringEngine> documentClusteringEngines = Maps.newLinkedHashMap();
+
+ /**
+ * An unmodifiable view of {@link #searchClusteringEngines}.
+ */
+ private final Map<String, SearchClusteringEngine> searchClusteringEnginesView = Collections.unmodifiableMap(searchClusteringEngines);
+
+ private NamedList<Object> initParams;
+
+ @Override
+ @SuppressWarnings({"rawtypes", "unchecked"})
+ public void init(NamedList args) {
+ this.initParams = args;
+ super.init(args);
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public void inform(SolrCore core) {
+ if (initParams != null) {
+ log.info("Initializing Clustering Engines");
+
+ // Our target list of engines, split into search-results and document clustering.
+ SolrResourceLoader loader = core.getResourceLoader();
+
+ for (Map.Entry<String,Object> entry : initParams) {
+ if ("engine".equals(entry.getKey())) {
+ NamedList<Object> engineInitParams = (NamedList<Object>) entry.getValue();
+
+ String engineClassName = StringUtils.defaultIfBlank(
+ (String) engineInitParams.get("classname"),
+ CarrotClusteringEngine.class.getName());
+
+ // Instantiate the clustering engine and split to appropriate map.
+ final ClusteringEngine engine = loader.newInstance(engineClassName, ClusteringEngine.class);
+ final String name = StringUtils.defaultIfBlank(engine.init(engineInitParams, core), "");
+ final ClusteringEngine previousEntry;
+ if (engine instanceof SearchClusteringEngine) {
+ previousEntry = searchClusteringEngines.put(name, (SearchClusteringEngine) engine);
+ } else if (engine instanceof DocumentClusteringEngine) {
+ previousEntry = documentClusteringEngines.put(name, (DocumentClusteringEngine) engine);
+ } else {
+ log.warn("Unknown type of a clustering engine for class: " + engineClassName);
+ continue;
+ }
+ if (previousEntry != null) {
+ log.warn("Duplicate clustering engine component named '" + name + "'.");
+ }
+ }
+ }
+
+ // Set up the default engine key for both types of engines.
+ setupDefaultEngine("search results clustering", searchClusteringEngines);
+ setupDefaultEngine("document clustering", documentClusteringEngines);
+
+ log.info("Finished Initializing Clustering Engines");
+ }
+ }
@Override
public void prepare(ResponseBuilder rb) throws IOException {
@@ -101,9 +163,9 @@ public class ClusteringComponent extends
DocumentClusteringEngine engine = documentClusteringEngines.get(name);
if (engine != null) {
boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
- NamedList nl = null;
+ NamedList<?> nl = null;
- //TODO: This likely needs to be made into a background task that runs in an executor
+ // TODO: This likely needs to be made into a background task that runs in an executor
if (useDocSet == true) {
nl = engine.cluster(rb.getResults().docSet, params);
} else {
@@ -182,76 +244,13 @@ public class ClusteringComponent extends
}
}
- @Override
- @SuppressWarnings("unchecked")
- public void init(NamedList args) {
- super.init(args);
- this.initParams = args;
- }
-
- @Override
- public void inform(SolrCore core) {
- if (initParams != null) {
- log.info("Initializing Clustering Engines");
- boolean searchHasDefault = false;
- boolean documentHasDefault = false;
- for (int i = 0; i < initParams.size(); i++) {
- if (initParams.getName(i).equals("engine")) {
- NamedList engineNL = (NamedList) initParams.getVal(i);
- String className = (String) engineNL.get("classname");
- if (className == null) {
- className = CarrotClusteringEngine.class.getName();
- }
- SolrResourceLoader loader = core.getResourceLoader();
- ClusteringEngine clusterer = loader.newInstance(className, ClusteringEngine.class);
- if (clusterer != null) {
- String name = clusterer.init(engineNL, core);
- if (name != null) {
- boolean isDefault = name.equals(ClusteringEngine.DEFAULT_ENGINE_NAME);
- if (clusterer instanceof SearchClusteringEngine) {
- if (isDefault == true && searchHasDefault == false) {
- searchHasDefault = true;
- } else if (isDefault == true && searchHasDefault == true) {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- searchClusteringEngines.put(name, (SearchClusteringEngine) clusterer);
- } else if (clusterer instanceof DocumentClusteringEngine) {
- if (isDefault == true && documentHasDefault == false) {
- searchHasDefault = true;
- } else if (isDefault == true && documentHasDefault == true) {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- documentClusteringEngines.put(name, (DocumentClusteringEngine) clusterer);
- }
- } else {
- if (clusterer instanceof SearchClusteringEngine && searchHasDefault == false) {
- searchClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (SearchClusteringEngine) clusterer);
- searchHasDefault = true;
- } else if (clusterer instanceof DocumentClusteringEngine && documentHasDefault == false) {
- documentClusteringEngines.put(ClusteringEngine.DEFAULT_ENGINE_NAME, (DocumentClusteringEngine) clusterer);
- documentHasDefault = true;
- } else {
- throw new RuntimeException("More than one engine is missing name: " + engineNL);
- }
- }
- }
- }
- }
- log.info("Finished Initializing Clustering Engines");
- }
- }
-
- /*
- * @return Unmodifiable Map of the engines, key is the name from the config, value is the engine
- * */
- public Map<String, SearchClusteringEngine> getSearchClusteringEngines() {
- return Collections.unmodifiableMap(searchClusteringEngines);
+ /**
+ * @return Expose for tests.
+ */
+ Map<String, SearchClusteringEngine> getSearchClusteringEngines() {
+ return searchClusteringEnginesView;
}
- // ///////////////////////////////////////////
- // / SolrInfoMBean
- // //////////////////////////////////////////
-
@Override
public String getDescription() {
return "A Clustering component";
@@ -261,4 +260,23 @@ public class ClusteringComponent extends
public String getSource() {
return "$URL$";
}
+
+ /**
+ * Setup the default clustering engine.
+ * @see "https://issues.apache.org/jira/browse/SOLR-5219"
+ */
+ private static <T extends ClusteringEngine> void setupDefaultEngine(String type, LinkedHashMap<String,T> map) {
+ // If there's already a default algorithm, leave it as is.
+ if (map.containsKey(ClusteringEngine.DEFAULT_ENGINE_NAME)) {
+ return;
+ }
+
+ // If there's no default algorithm, and there are any algorithms available,
+ // the first definition becomes the default algorithm.
+ if (!map.isEmpty()) {
+ Entry<String,T> first = map.entrySet().iterator().next();
+ map.put(ClusteringEngine.DEFAULT_ENGINE_NAME, first.getValue());
+ log.info("Default engine for " + type + ": " + first.getKey());
+ }
+ }
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java Wed Sep 11 18:45:27 2013
@@ -28,7 +28,7 @@ public class ClusteringEngine {
public static final String ENGINE_NAME = "name";
public static final String DEFAULT_ENGINE_NAME = "default";
- public String init(NamedList config, SolrCore core) {
+ public String init(NamedList<?> config, SolrCore core) {
name = (String) config.get(ENGINE_NAME);
return name;
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/DocumentClusteringEngine.java Wed Sep 11 18:45:27 2013
@@ -34,7 +34,7 @@ public abstract class DocumentClustering
* @param solrParams The params controlling clustering
* @return The clustering results
*/
- public abstract NamedList cluster(SolrParams solrParams);
+ public abstract NamedList<?> cluster(SolrParams solrParams);
/**
* Experimental. Subject to change before the next release
@@ -44,7 +44,7 @@ public abstract class DocumentClustering
* @param solrParams The params controlling the clustering
* @return The results.
*/
- public abstract NamedList cluster(DocSet docs, SolrParams solrParams);
+ public abstract NamedList<?> cluster(DocSet docs, SolrParams solrParams);
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Wed Sep 11 18:45:27 2013
@@ -276,7 +276,7 @@ public class CarrotClusteringEngine exte
}
@Override
- @SuppressWarnings({ "unchecked", "rawtypes" })
+ @SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
this.core = core;
@@ -569,6 +569,13 @@ public class CarrotClusteringEngine exte
}
/**
+ * Expose clustering algorithm class for tests.
+ */
+ Class<? extends IClusteringAlgorithm> getClusteringAlgorithmClass() {
+ return clusteringAlgorithmClass;
+ }
+
+ /**
* Prepares a map of Solr field names (keys) to the corresponding Carrot2
* custom field names.
*/
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml Wed Sep 11 18:45:27 2013
@@ -373,7 +373,47 @@
</searchComponent>
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-default">
+ <lst name="engine">
+ <str name="name">stc</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">default</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">mock</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
+
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-decl-order">
+ <lst name="engine">
+ <str name="name">lingo</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">stc</str>
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="name">mock</str>
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
+ <searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="clustering-name-dups">
+ <lst name="engine">
+ <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
+ </lst>
+ <lst name="engine">
+ <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+ </lst>
+ </searchComponent>
<!-- Update request handler.
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java Wed Sep 11 18:45:27 2013
@@ -16,6 +16,8 @@ package org.apache.solr.handler.clusteri
* limitations under the License.
*/
+import java.util.Map;
+
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.junit.BeforeClass;
@@ -82,10 +84,16 @@ public abstract class AbstractClustering
docWithCustomFields.addField("heading", "first");
docWithCustomFields.addField("heading", "second");
assertNull(h.validateUpdate(adoc(docWithCustomFields)));
-
assertNull(h.validateUpdate(commit()));
}
+ /**
+ * Expose package-scope methods from {@link ClusteringComponent} to tests.
+ */
+ protected final Map<String,SearchClusteringEngine> getSearchClusteringEngines(ClusteringComponent comp) {
+ return comp.getSearchClusteringEngines();
+ }
+
final static String[][] DOCUMENTS = new String[][]{
{"http://en.wikipedia.org/wiki/Data_mining",
"Data Mining - Wikipedia",
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/ClusteringComponentTest.java Wed Sep 11 18:45:27 2013
@@ -55,7 +55,7 @@ public class ClusteringComponentTest ext
rsp.add("responseHeader", new SimpleOrderedMap<Object>());
SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
handler.handleRequest(req, rsp);
- NamedList values = rsp.getValues();
+ NamedList<?> values = rsp.getValues();
Object clusters = values.get("clusters");
//System.out.println("Clusters: " + clusters);
assertTrue("clusters is null and it shouldn't be", clusters != null);
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/MockDocumentClusteringEngine.java Wed Sep 11 18:45:27 2013
@@ -26,14 +26,12 @@ import org.apache.solr.search.DocSet;
**/
public class MockDocumentClusteringEngine extends DocumentClusteringEngine {
@Override
- public NamedList cluster(DocSet docs, SolrParams solrParams) {
- NamedList result = new NamedList();
- return result;
+ public NamedList<?> cluster(DocSet docs, SolrParams solrParams) {
+ return new NamedList<Object>();
}
@Override
- public NamedList cluster(SolrParams solrParams) {
- NamedList result = new NamedList();
- return result;
+ public NamedList<?> cluster(SolrParams solrParams) {
+ return new NamedList<Object>();
}
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Wed Sep 11 18:45:27 2013
@@ -34,16 +34,20 @@ import org.apache.solr.common.params.Sol
import org.apache.solr.common.util.NamedList;
import org.apache.solr.handler.clustering.AbstractClusteringTestCase;
import org.apache.solr.handler.clustering.ClusteringComponent;
+import org.apache.solr.handler.clustering.ClusteringEngine;
+import org.apache.solr.handler.clustering.SearchClusteringEngine;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.SolrPluginUtils;
+import org.carrot2.clustering.lingo.LingoClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.util.attribute.AttributeUtils;
import org.junit.Test;
import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
/**
*
@@ -393,12 +397,48 @@ public class CarrotClusteringEngineTest
assertTrue("First token", labels.get(0).contains("titletitle"));
}
+ @Test
+ public void testDefaultEngineOrder() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-default");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("stc", "default", "mock"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ LingoClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
+ @Test
+ public void testDeclarationEngineOrder() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-decl-order");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("lingo", "stc", "mock", "default"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ LingoClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
+ @Test
+ public void testDeclarationNameDuplicates() throws Exception {
+ ClusteringComponent comp = (ClusteringComponent) h.getCore().getSearchComponent("clustering-name-dups");
+ Map<String,SearchClusteringEngine> engines = getSearchClusteringEngines(comp);
+ assertEquals(
+ Lists.newArrayList("", "default"),
+ Lists.newArrayList(engines.keySet()));
+ assertEquals(
+ MockClusteringAlgorithm.class,
+ ((CarrotClusteringEngine) engines.get(ClusteringEngine.DEFAULT_ENGINE_NAME)).getClusteringAlgorithmClass());
+ }
+
private CarrotClusteringEngine getClusteringEngine(String engineName) {
ClusteringComponent comp = (ClusteringComponent) h.getCore()
.getSearchComponent("clustering");
assertNotNull("clustering component should not be null", comp);
- CarrotClusteringEngine engine = (CarrotClusteringEngine) comp
- .getSearchClusteringEngines().get(engineName);
+ CarrotClusteringEngine engine =
+ (CarrotClusteringEngine) getSearchClusteringEngines(comp).get(engineName);
assertNotNull("clustering engine for name: " + engineName
+ " should not be null", engine);
return engine;
Added: lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/lingo-attributes.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/lingo-attributes.xml?rev=1521978&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/lingo-attributes.xml (added)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/lingo-attributes.xml Wed Sep 11 18:45:27 2013
@@ -0,0 +1,24 @@
+<!--
+ Default configuration for the Lingo clustering algorithm.
+
+ This file can be loaded (and saved) by Carrot2 Workbench.
+ http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+ <attribute-set id="attributes">
+ <value-set>
+ <label>attributes</label>
+ <!--
+ The language to assume for clustered documents.
+ For a list of allowed values, see:
+ http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+ -->
+ <attribute key="MultilingualClustering.defaultLanguage">
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+ </attribute>
+ <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
+ <value type="java.lang.Integer" value="20"/>
+ </attribute>
+ </value-set>
+ </attribute-set>
+</attribute-sets>
\ No newline at end of file
Modified: lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml?rev=1521978&r1=1521977&r2=1521978&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml Wed Sep 11 18:45:27 2013
@@ -1390,7 +1390,7 @@
"default" (and it becomes the default one for the search component).
-->
<lst name="engine">
- <str name="name">default</str>
+ <str name="name">lingo</str>
<!-- Class name of a clustering algorithm compatible with the Carrot2 framework.
@@ -1447,7 +1447,6 @@
class="solr.SearchHandler">
<lst name="defaults">
<bool name="clustering">true</bool>
- <str name="clustering.engine">default</str>
<bool name="clustering.results">true</bool>
<!-- Field name with the logical "title" of a each document (optional) -->
<str name="carrot.title">name</str>