You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by dw...@apache.org on 2013/09/06 22:02:58 UTC

svn commit: r1520677 - in /lucene/dev/trunk/solr: contrib/clustering/src/java/org/apache/solr/handler/clustering/ contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/ contrib/clustering/src/test-files/clustering/solr/collection1/conf...

Author: dweiss
Date: Fri Sep  6 20:02:57 2013
New Revision: 1520677

URL: http://svn.apache.org/r1520677
Log:
SOLR-5202: Support easier overrides of Carrot2 clustering attributes via XML data sets exported from the Workbench. Polished clustering configuration examples.

Added:
    lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml
    lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
    lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/
    lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml
    lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml
    lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml
Modified:
    lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
    lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
    lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
    lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
    lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
    lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
    lucene/dev/trunk/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml
    lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml

Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringComponent.java Fri Sep  6 20:02:57 2013
@@ -59,11 +59,13 @@ public class ClusteringComponent extends
 
   private Map<String, SearchClusteringEngine> searchClusteringEngines = new HashMap<String, SearchClusteringEngine>();
   private Map<String, DocumentClusteringEngine> documentClusteringEngines = new HashMap<String, DocumentClusteringEngine>();
+
   /**
-   * Base name for all spell checker query parameters. This name is also used to
+   * Base name for all component parameters. This name is also used to
    * register this component with SearchHandler.
    */
   public static final String COMPONENT_NAME = "clustering";
+  
   private NamedList initParams;
 
   @Override

Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/ClusteringEngine.java Fri Sep  6 20:02:57 2013
@@ -30,7 +30,6 @@ public class ClusteringEngine {
 
   public String init(NamedList config, SolrCore core) {
     name = (String) config.get(ENGINE_NAME);
-
     return name;
   }
 

Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Fri Sep  6 20:02:57 2013
@@ -46,6 +46,7 @@ import org.apache.solr.common.util.Named
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.handler.clustering.ClusteringEngine;
 import org.apache.solr.handler.clustering.SearchClusteringEngine;
 import org.apache.solr.handler.component.HighlightComponent;
 import org.apache.solr.highlight.SolrHighlighter;
@@ -66,6 +67,8 @@ import org.carrot2.core.attribute.Attrib
 import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
 import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
 import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
+import org.carrot2.util.attribute.AttributeValueSet;
+import org.carrot2.util.attribute.AttributeValueSets;
 import org.carrot2.util.resource.ClassLoaderLocator;
 import org.carrot2.util.resource.IResource;
 import org.carrot2.util.resource.IResourceLocator;
@@ -73,11 +76,11 @@ import org.carrot2.util.resource.Resourc
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import com.google.common.base.Objects;
+import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 import com.google.common.collect.Sets;
-import com.google.common.io.Closeables;
-import com.google.common.io.Closer;
 
 /**
  * Search results clustering engine based on Carrot2 clustering algorithms.
@@ -122,8 +125,19 @@ public class CarrotClusteringEngine exte
 
     public SolrResourceLocator(SolrCore core, SolrParams initParams) {
       resourceLoader = core.getResourceLoader();
-      carrot2ResourcesDir = initParams.get(
-          CarrotParams.LEXICAL_RESOURCES_DIR, CARROT_RESOURCES_PREFIX);
+      
+      @SuppressWarnings("deprecation")
+      String lexicalResourcesDir = initParams.get(CarrotParams.LEXICAL_RESOURCES_DIR);
+      String resourcesDir = initParams.get(CarrotParams.RESOURCES_DIR);
+      carrot2ResourcesDir = firstNonNull(resourcesDir, lexicalResourcesDir, CARROT_RESOURCES_PREFIX);
+    }
+
+    @SuppressWarnings("unchecked")
+    public static <T> T firstNonNull(T... args) {
+      for (T t : args) {
+        if (t != null) return t;
+      }
+      throw new NullPointerException("At least one element has to be non-null.");
     }
 
     @Override
@@ -269,8 +283,52 @@ public class CarrotClusteringEngine exte
     String result = super.init(config, core);
     final SolrParams initParams = SolrParams.toSolrParams(config);
 
-    // Initialize Carrot2 controller. Pass initialization attributes, if any.
+    // Initialization attributes for Carrot2 controller.
     HashMap<String, Object> initAttributes = new HashMap<String, Object>();
+
+    // Customize Carrot2's resource lookup to first look for resources
+    // using Solr's resource loader. If that fails, try loading from the classpath.
+    ResourceLookup resourceLookup = new ResourceLookup(
+      // Solr-specific resource loading.
+      new SolrResourceLocator(core, initParams),
+      // Using the class loader directly because this time we want to omit the prefix
+      new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
+
+    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes)
+      .resourceLookup(resourceLookup);
+
+    // Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
+    // of this component. This by-name convention lookup is used to simplify configuring algorithms.
+    String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
+    log.info("Initializing Clustering Engine '" + Objects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
+
+    if (!Strings.isNullOrEmpty(componentName)) {
+      IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
+      if (attributeXmls.length > 0) {
+        if (attributeXmls.length > 1) {
+          log.warn("More than one attribute file found, first one will be used: " 
+              + Arrays.toString(attributeXmls));
+        }
+
+        Thread ct = Thread.currentThread();
+        ClassLoader prev = ct.getContextClassLoader();
+        try {
+          ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
+
+          AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
+          AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
+          initAttributes.putAll(defaultSet.getAttributeValues());
+        } catch (Exception e) {
+          throw new SolrException(ErrorCode.SERVER_ERROR, 
+              "Could not read attributes XML for clustering component: " 
+                  + componentName, e);
+        } finally {
+          ct.setContextClassLoader(prev);
+        }
+      }
+    }
+
+    // Extract solrconfig attributes, they take precedence.
     extractCarrotAttributes(initParams, initAttributes);
 
     // Customize the stemmer and tokenizer factories. The implementations we provide here
@@ -291,15 +349,6 @@ public class CarrotClusteringEngine exte
     // Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
     initAttributes.put("solrCore", core);
 
-    // Customize Carrot2's resource lookup to first look for resources
-    // using Solr's resource loader. If that fails, try loading from the classpath.
-    DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(
-      new ResourceLookup(
-        // Solr-specific resource loading.
-        new SolrResourceLocator(core, initParams),
-        // Using the class loader directly because this time we want to omit the prefix
-        new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
-
     // Carrot2 uses current thread's context class loader to get
     // certain classes (e.g. custom tokenizer/stemmer) at initialization time.
     // To make sure classes from contrib JARs are available,
@@ -322,7 +371,9 @@ public class CarrotClusteringEngine exte
 
     // Make sure the requested Carrot2 clustering algorithm class is available
     String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
-    this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
+    this.clusteringAlgorithmClass = core.getResourceLoader().findClass(
+        carrotAlgorithmClassName, IClusteringAlgorithm.class);
+
     return result;
   }
 
@@ -440,8 +491,9 @@ public class CarrotClusteringEngine exte
         docsHolder[0] = docIds.get(sdoc).intValue();
         DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
         NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
-        if (highlights != null && highlights.size() == 1) {//should only be one value given our setup
-          //should only be one document
+        if (highlights != null && highlights.size() == 1) {
+          // should only be one value given our setup
+          // should only be one document
           @SuppressWarnings("unchecked")
           NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
           

Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java Fri Sep  6 20:02:57 2013
@@ -43,9 +43,21 @@ public final class CarrotParams {
 
   public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
   public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
-  public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+
   public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
 
+  /**
+   * Use {@link #RESOURCES_DIR}.
+   */
+  @Deprecated
+  public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
+  
+  /**
+   * A replacement property pointing to Carrot<sup>2</sup> resources
+   * (a more generic version of the deprecated {@link #LEXICAL_RESOURCES_DIR}).
+   */
+  public static String RESOURCES_DIR = CARROT_PREFIX + "resourcesDir";
+
   static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
           ALGORITHM, 
           
@@ -62,6 +74,7 @@ public final class CarrotParams {
           NUM_DESCRIPTIONS, 
           OUTPUT_SUB_CLUSTERS, 
           LEXICAL_RESOURCES_DIR,
+          RESOURCES_DIR,
           LANGUAGE_CODE_MAP);
   
   /** No instances. */

Added: lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml?rev=1520677&view=auto
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml (added)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/clustering/carrot2/mock-external-attrs-attributes.xml Fri Sep  6 20:02:57 2013
@@ -0,0 +1,10 @@
+<attribute-sets default="overridden-attributes">
+  <attribute-set id="overridden-attributes">
+    <value-set>
+      <label>defaults</label>
+      <attribute key="MockClusteringAlgorithm.depth"><value value="1" /></attribute>
+      <attribute key="MockClusteringAlgorithm.labels"><value value="3" /></attribute>
+      <attribute key="MockClusteringAlgorithm.maxClusters"><value value="13" /></attribute>
+    </value-set>
+  </attribute-set>
+</attribute-sets>
\ No newline at end of file

Modified: lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test-files/clustering/solr/collection1/conf/solrconfig.xml Fri Sep  6 20:02:57 2013
@@ -328,6 +328,12 @@
       <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
     </lst>
     <lst name="engine">
+      <str name="name">mock-external-attrs</str>
+      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.MockClusteringAlgorithm</str>
+      <!-- takes precedence over external XML -->
+      <int name="MockClusteringAlgorithm.labels">4</int>
+    </lst>
+    <lst name="engine">
       <str name="name">echo</str>
       <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoClusteringAlgorithm</str>
     </lst>
@@ -338,6 +344,11 @@
     <lst name="engine">
       <str name="name">lexical-resource-check-custom-resource-dir</str>
       <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
+      <str name="carrot.resourcesDir">clustering/custom</str>
+    </lst>
+    <lst name="engine">
+      <str name="name">lexical-resource-check-custom-resource-dir-deprecated</str>
+      <str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
       <str name="carrot.lexicalResourcesDir">clustering/custom</str>
     </lst>
     <lst name="engine">

Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Fri Sep  6 20:02:57 2013
@@ -122,7 +122,14 @@ public class CarrotClusteringEngineTest 
   @Test
   public void testWithoutSubclusters() throws Exception {
     checkClusters(checkEngine(getClusteringEngine("mock"), AbstractClusteringTestCase.numberOfDocs),
-            1, 1, 0);
+        1, 1, 0);
+  }
+
+  @Test
+  public void testExternalXmlAttributesFile() throws Exception {
+    checkClusters(
+        checkEngine(getClusteringEngine("mock-external-attrs"), 13),
+        1, 4, 0);
   }
 
   @Test
@@ -189,6 +196,12 @@ public class CarrotClusteringEngineTest 
         "online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
   }
 
+  @Test
+  public void testLexicalResourcesFromSolrConfigCustomDirDeprecated() throws Exception {
+    checkLexicalResourcesFromSolrConfig("lexical-resource-check-custom-resource-dir-deprecated",
+        "online,customsolrstopwordcustomdir,customsolrstoplabelcustomdir");
+  }
+
   private void checkLexicalResourcesFromSolrConfig(String engineName, String wordsToCheck)
       throws IOException {
     ModifiableSolrParams params = new ModifiableSolrParams();

Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/MockClusteringAlgorithm.java Fri Sep  6 20:02:57 2013
@@ -52,6 +52,12 @@ public class MockClusteringAlgorithm ext
   @Input
   @Processing
   @Attribute
+  @IntRange(min = 0)
+  private int maxClusters = 0;
+
+  @Input
+  @Processing
+  @Attribute
   private int otherTopicsModulo = 0;
 
   @Override
@@ -61,6 +67,10 @@ public class MockClusteringAlgorithm ext
       return;
     }
 
+    if (maxClusters > 0) {
+      documents = documents.subList(0, maxClusters);
+    }
+
     int documentIndex = 1;
     for (Document document : documents) {
       StringBuilder label = new StringBuilder("Cluster " + documentIndex);

Modified: lucene/dev/trunk/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-schemaless/solr/collection1/conf/solrconfig.xml Fri Sep  6 20:02:57 2013
@@ -1369,113 +1369,7 @@
     </arr>
   </requestHandler>
 
-  <!-- Clustering Component
-
-       http://wiki.apache.org/solr/ClusteringComponent
-
-       You'll need to set the solr.clustering.enabled system property
-       when running solr to run with clustering enabled:
-
-            java -Dsolr.clustering.enabled=true -jar start.jar
-
-    -->
-  <searchComponent name="clustering"
-                   enable="${solr.clustering.enabled:false}"
-                   class="solr.clustering.ClusteringComponent" >
-    <!-- Declare an engine -->
-    <lst name="engine">
-      <!-- The name, only one can be named "default" -->
-      <str name="name">default</str>
-
-      <!-- Class name of Carrot2 clustering algorithm.
-
-           Currently available algorithms are:
-           
-           * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-           * org.carrot2.clustering.stc.STCClusteringAlgorithm
-           * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-           
-           See http://project.carrot2.org/algorithms.html for the
-           algorithm's characteristics.
-        -->
-      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
-
-      <!-- Overriding values for Carrot2 default algorithm attributes.
-
-           For a description of all available attributes, see:
-           http://download.carrot2.org/stable/manual/#chapter.components.
-           Use attribute key as name attribute of str elements
-           below. These can be further overridden for individual
-           requests by specifying attribute key as request parameter
-           name and attribute value as parameter value.
-        -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
-
-      <!-- Location of Carrot2 lexical resources.
-
-           A directory from which to load Carrot2-specific stop words
-           and stop labels. Absolute or relative to Solr config directory.
-           If a specific resource (e.g. stopwords.en) is present in the
-           specified dir, it will completely override the corresponding
-           default one that ships with Carrot2.
-
-           For an overview of Carrot2 lexical resources, see:
-           http://download.carrot2.org/head/manual/#chapter.lexical-resources
-        -->
-      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
-      <!-- The language to assume for the documents.
-
-           For a list of allowed values, see:
-           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
-       -->
-      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
-    </lst>
-    <lst name="engine">
-      <str name="name">stc</str>
-      <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
-    </lst>
-  </searchComponent>
-
-  <!-- A request handler for demonstrating the clustering component
-
-       This is purely as an example.
-
-       In reality you will likely want to add the component to your 
-       already specified request handlers. 
-    -->
-  <requestHandler name="/clustering"
-                  startup="lazy"
-                  enable="${solr.clustering.enabled:false}"
-                  class="solr.SearchHandler">
-    <lst name="defaults">
-      <bool name="clustering">true</bool>
-      <str name="clustering.engine">default</str>
-      <bool name="clustering.results">true</bool>
-      <!-- The title field -->
-      <str name="carrot.title">name</str>
-      <str name="carrot.url">id</str>
-      <!-- The field to cluster on -->
-      <str name="carrot.snippet">features</str>
-      <!-- produce summaries -->
-      <bool name="carrot.produceSummary">true</bool>
-      <!-- the maximum number of labels per cluster -->
-      <!--<int name="carrot.numDescriptions">5</int>-->
-      <!-- produce sub clusters -->
-      <bool name="carrot.outputSubClusters">false</bool>
-
-      <str name="defType">edismax</str>
-      <str name="qf">
-        text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-      </str>
-      <str name="q.alt">*:*</str>
-      <str name="rows">10</str>
-      <str name="fl">*,score</str>
-    </lst>
-    <arr name="last-components">
-      <str>clustering</str>
-    </arr>
-  </requestHandler>
+  <!-- Clustering Component. (Omitted here. See the default Solr example for a typical configuration.) -->
 
   <!-- Terms Component
 

Added: lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml?rev=1520677&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml (added)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/default-attributes.xml Fri Sep  6 20:02:57 2013
@@ -0,0 +1,24 @@
+<!-- 
+  Default configuration for the Lingo clustering algorithm.
+
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <!-- 
+          The language to assume for clustered documents.
+          For a list of allowed values, see: 
+          http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+          -->
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
+            <value type="java.lang.Integer" value="20"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>
\ No newline at end of file

Added: lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml?rev=1520677&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml (added)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/kmeans-attributes.xml Fri Sep  6 20:02:57 2013
@@ -0,0 +1,19 @@
+<!-- 
+  Default configuration for the bisecting k-means clustering algorithm.
+  
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="MultilingualClustering.languageAggregationStrategy">
+            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>

Added: lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml?rev=1520677&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml (added)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/carrot2/stc-attributes.xml Fri Sep  6 20:02:57 2013
@@ -0,0 +1,19 @@
+<!-- 
+  Default configuration for the STC clustering algorithm.
+
+  This file can be loaded (and saved) by Carrot2 Workbench.
+  http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+    <attribute-set id="attributes">
+      <value-set>
+        <label>attributes</label>
+          <attribute key="MultilingualClustering.defaultLanguage">
+            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+          </attribute>
+          <attribute key="MultilingualClustering.languageAggregationStrategy">
+            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+          </attribute>
+      </value-set>
+  </attribute-set>
+</attribute-sets>

Modified: lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml?rev=1520677&r1=1520676&r2=1520677&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml Fri Sep  6 20:02:57 2013
@@ -1382,59 +1382,57 @@
   <searchComponent name="clustering"
                    enable="${solr.clustering.enabled:true}"
                    class="solr.clustering.ClusteringComponent" >
-    <!-- Declare an engine -->
+    <!-- Declare a named clustering engine. Only one engine can be named 
+         "default" (and it becomes the default one for the search component). 
+      -->
     <lst name="engine">
-      <!-- The name, only one can be named "default" -->
       <str name="name">default</str>
 
-      <!-- Class name of Carrot2 clustering algorithm.
+      <!-- Class name of a clustering algorithm compatible with the Carrot2
+           framework.
 
-           Currently available algorithms are:
-           
+           Currently available open source algorithms are:
            * org.carrot2.clustering.lingo.LingoClusteringAlgorithm
            * org.carrot2.clustering.stc.STCClusteringAlgorithm
            * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm
-           
-           See http://project.carrot2.org/algorithms.html for the
-           algorithm's characteristics.
-        -->
-      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
 
-      <!-- Overriding values for Carrot2 default algorithm attributes.
+           See http://project.carrot2.org/algorithms.html for more information.
 
-           For a description of all available attributes, see:
-           http://download.carrot2.org/stable/manual/#chapter.components.
-           Use attribute key as name attribute of str elements
-           below. These can be further overridden for individual
-           requests by specifying attribute key as request parameter
-           name and attribute value as parameter value.
+           A commercial algorithm Lingo3G (needs to be installed separately) is defined as:
+           * com.carrotsearch.lingo3g.Lingo3GClusteringAlgorithm
         -->
-      <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str>
 
-      <!-- Location of Carrot2 lexical resources.
+      <!-- Override location of the clustering algorithm's resources 
+           (attribute definitions and lexical resources).
 
-           A directory from which to load Carrot2-specific stop words
-           and stop labels. Absolute or relative to Solr config directory.
+           A directory from which to load algorithm-specific stop words,
+           stop labels and attribute definition XMLs. 
+           Absolute or relative to Solr config directory.
            If a specific resource (e.g. stopwords.en) is present in the
            specified dir, it will completely override the corresponding
-           default one that ships with Carrot2.
+           default one that typically ships with each algorithm.
 
            For an overview of Carrot2 lexical resources, see:
            http://download.carrot2.org/head/manual/#chapter.lexical-resources
-        -->
-      <str name="carrot.lexicalResourcesDir">clustering/carrot2</str>
-
-      <!-- The language to assume for the documents.
-
-           For a list of allowed values, see:
-           http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+           
+           For an overview of Lingo3G lexical resources, see:
+           http://download.carrotsearch.com/lingo3g/manual/#chapter.lexical-resources
        -->
-      <str name="MultilingualClustering.defaultLanguage">ENGLISH</str>
+      <!-- <str name="carrot.resourcesDir">clustering/carrot2</str> -->
     </lst>
+
+    <!-- An example definition for the STC clustering algorithm. -->
     <lst name="engine">
       <str name="name">stc</str>
       <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str>
     </lst>
+
+    <!-- An example definition for the bisecting kmeans clustering algorithm. -->
+    <lst name="engine">
+      <str name="name">kmeans</str>
+      <str name="carrot.algorithm">org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm</str>
+    </lst>
   </searchComponent>
 
   <!-- A request handler for demonstrating the clustering component