You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by st...@apache.org on 2011/12/09 16:57:36 UTC
svn commit: r1212488 - in /lucene/dev/trunk/solr/contrib/clustering: ./
src/java/org/apache/solr/handler/clustering/carrot2/
src/test/org/apache/solr/handler/clustering/
src/test/org/apache/solr/handler/clustering/carrot2/
Author: stanislaw
Date: Fri Dec 9 15:57:36 2011
New Revision: 1212488
URL: http://svn.apache.org/viewvc?rev=1212488&view=rev
Log:
SOLR-2937: Configuring the number of contextual snippets used for search results clustering
Modified:
lucene/dev/trunk/solr/contrib/clustering/CHANGES.txt
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java
lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
Modified: lucene/dev/trunk/solr/contrib/clustering/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/CHANGES.txt?rev=1212488&r1=1212487&r2=1212488&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/contrib/clustering/CHANGES.txt Fri Dec 9 15:57:36 2011
@@ -13,7 +13,10 @@ $Id$
================== Release 3.6.0 ==================
-(No Changes)
+* SOLR-2937: Configuring the number of contextual snippets used for
+ search results clustering. The hl.snippets parameter is now respected
+ by the clustering plugin, can be overridden by carrot.summarySnippets
+ if needed (Stanislaw Osinski).
================== Release 3.5.0 ==================
@@ -21,10 +24,10 @@ $Id$
================== Release 3.4.0 ==================
-SOLR-2706: The carrot.lexicalResourcesDir parameter now works
- with absolute directories (Stanislaw Osinski)
+* SOLR-2706: The carrot.lexicalResourcesDir parameter now works
+ with absolute directories (Stanislaw Osinski)
-SOLR-2692: Typo in param name fixed: "carrot.fragzise" changed to
+* SOLR-2692: Typo in param name fixed: "carrot.fragzise" changed to
"carrot.fragSize" (Stanislaw Osinski).
================== Release 3.3.0 ==================
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java?rev=1212488&r1=1212487&r2=1212488&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngine.java Fri Dec 9 15:57:36 2011
@@ -331,6 +331,7 @@ public class CarrotClusteringEngine exte
args.put(HighlightParams.SIMPLE_PRE, ""); //we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
+ args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
@@ -364,8 +365,16 @@ public class CarrotClusteringEngine exte
@SuppressWarnings("unchecked")
NamedList<String []> tmp = (NamedList<String[]>) highlights.getVal(0);
String [] highlt = tmp.get(snippetField);
- if (highlt != null && highlt.length == 1) {
- snippet = highlt[0];
+
+ // Join fragments with a period, so that Carrot2 does not create
+ // cross-fragment phrases, such phrases rarely make sense.
+ if (highlt != null && highlt.length > 0) {
+ final StringBuilder sb = new StringBuilder(highlt[0]);
+ for (int i = 1; i < highlt.length; i++) {
+ sb.append(" . ");
+ sb.append(highlt[i]);
+ }
+ snippet = sb.toString();
}
}
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java?rev=1212488&r1=1212487&r2=1212488&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/CarrotParams.java Fri Dec 9 15:57:36 2011
@@ -34,6 +34,7 @@ public interface CarrotParams {
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
+ String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java?rev=1212488&r1=1212487&r2=1212488&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/AbstractClusteringTestCase.java Fri Dec 9 15:57:36 2011
@@ -17,6 +17,7 @@ package org.apache.solr.handler.clusteri
*/
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.SolrInputDocument;
import org.junit.BeforeClass;
@@ -34,6 +35,17 @@ public abstract class AbstractClustering
assertNull(h.validateUpdate(adoc("id", Integer.toString(numberOfDocs), "url", doc[0], "title", doc[1], "snippet", doc[2])));
numberOfDocs++;
}
+
+ // Add a multi-valued snippet
+ final SolrInputDocument multiValuedSnippet = new SolrInputDocument();
+ multiValuedSnippet.addField("id", numberOfDocs++);
+ multiValuedSnippet.addField("title", "Title");
+ multiValuedSnippet.addField("url", "URL");
+ multiValuedSnippet.addField("snippet", "First value of multi field. Some more text. And still more.");
+ multiValuedSnippet.addField("snippet", "Second value of multi field. Some more text. And still more.");
+ multiValuedSnippet.addField("snippet", "Third value of multi field. Some more text. And still more.");
+ assertNull(h.validateUpdate(adoc(multiValuedSnippet)));
+
assertNull(h.validateUpdate(commit()));
}
Modified: lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java?rev=1212488&r1=1212487&r2=1212488&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java (original)
+++ lucene/dev/trunk/solr/contrib/clustering/src/test/org/apache/solr/handler/clustering/carrot2/CarrotClusteringEngineTest.java Fri Dec 9 15:57:36 2011
@@ -88,10 +88,15 @@ public class CarrotClusteringEngineTest
private List<NamedList<Object>> clusterWithHighlighting(
boolean enableHighlighting, int fragSize) throws IOException {
-
- final TermQuery query = new TermQuery(new Term("snippet", "mine"));
// Two documents don't have mining in the snippet
- int expectedNumDocuments = numberOfDocs - 2;
+ return clusterWithHighlighting(enableHighlighting, fragSize, 1, "mine", numberOfDocs - 2);
+ }
+
+ private List<NamedList<Object>> clusterWithHighlighting(
+ boolean enableHighlighting, int fragSize, int summarySnippets,
+ String term, int expectedNumDocuments) throws IOException {
+
+ final TermQuery query = new TermQuery(new Term("snippet", term));
final ModifiableSolrParams summaryParams = new ModifiableSolrParams();
summaryParams.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
@@ -99,6 +104,8 @@ public class CarrotClusteringEngineTest
Boolean.toString(enableHighlighting));
summaryParams
.add(CarrotParams.SUMMARY_FRAGSIZE, Integer.toString(fragSize));
+ summaryParams
+ .add(CarrotParams.SUMMARY_SNIPPETS, Integer.toString(summarySnippets));
final List<NamedList<Object>> summaryClusters = checkEngine(
getClusteringEngine("echo"), expectedNumDocuments,
expectedNumDocuments, query, summaryParams);
@@ -229,6 +236,23 @@ public class CarrotClusteringEngineTest
assertEquals(ImmutableList.of("solrownstopword"),
getLabels(clusters.get(1)));
}
+
+ @Test
+ public void highlightingOfMultiValueField() throws Exception {
+ final String snippetWithoutSummary = getLabels(clusterWithHighlighting(
+ false, 30, 3, "multi", 1).get(0)).get(1);
+ assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First"));
+ assertTrue("Snippet contains second value", snippetWithoutSummary.contains("Second"));
+ assertTrue("Snippet contains third value", snippetWithoutSummary.contains("Third"));
+
+ final String snippetWithSummary = getLabels(clusterWithHighlighting(
+ true, 30, 3, "multi", 1).get(0)).get(1);
+ assertTrue("Snippet with summary shorter than full snippet",
+ snippetWithoutSummary.length() > snippetWithSummary.length());
+ assertTrue("Summary covers first value", snippetWithSummary.contains("First"));
+ assertTrue("Summary covers second value", snippetWithSummary.contains("Second"));
+ assertTrue("Summary covers third value", snippetWithSummary.contains("Third"));
+ }
private CarrotClusteringEngine getClusteringEngine(String engineName) {
ClusteringComponent comp = (ClusteringComponent) h.getCore()