You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2016/11/07 16:50:32 UTC
[18/19] lucene-solr:apiv2: SOLR-9293: Solrj client support for
hierarchical clusters and other topics marker.
SOLR-9293: Solrj client support for hierarchical clusters and other topics marker.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/7fb72bfe
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/7fb72bfe
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/7fb72bfe
Branch: refs/heads/apiv2
Commit: 7fb72bfe10d84d3419b07a8782418f86ab075a56
Parents: cc99815
Author: Dawid Weiss <dw...@apache.org>
Authored: Mon Nov 7 14:34:17 2016 +0100
Committer: Dawid Weiss <dw...@apache.org>
Committed: Mon Nov 7 15:27:27 2016 +0100
----------------------------------------------------------------------
solr/CHANGES.txt | 3 +
.../solr/client/solrj/response/Cluster.java | 48 +++++++++------
.../solrj/response/ClusteringResponse.java | 48 +++++++++++----
.../solrj/sampleClusteringResponse.xml | 19 ++++++
.../solrj/response/TestClusteringResponse.java | 62 +++++---------------
5 files changed, 107 insertions(+), 73 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fb72bfe/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2e6487d..ad022fb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -81,6 +81,9 @@ Detailed Change List
New Features
----------------------
+* SOLR-9293: Solrj client support for hierarchical clusters and other topics
+ marker. (Dawid Weiss)
+
* SOLR-9681: FacetModule / JSON Facet API added the ability to add filters directly to
any facet command. The filters are applied after any domain change operations.
Example: { type:terms, field:category, filter:"user:yonik" }
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fb72bfe/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java
index ae3e529..378e1a7 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/Cluster.java
@@ -16,7 +16,9 @@
*/
package org.apache.solr.client.solrj.response;
+import java.util.Collections;
import java.util.List;
+import java.util.Objects;
/**
* This class represents a cluster of Solr Docs .
@@ -28,41 +30,43 @@ public class Cluster {
private List<String> labels;
private double score;
private List<String> docIds;
+ private List<Cluster> subclusters;
+ private boolean otherTopics;
+
+ public Cluster(List<String> labels, double score, List<String> docIds) {
+ this(labels, score, docIds, Collections.emptyList(), false);
+ }
/**
* @param labels the list of human readable labels associated to the cluster
* @param score the score produced by the clustering algorithm for the current cluster
* @param docIds the list of document Ids belonging to the cluster
*/
- public Cluster(List<String> labels, double score, List<String> docIds) {
+ public Cluster(List<String> labels, double score, List<String> docIds, List<Cluster> subclusters, boolean otherTopics) {
this.labels = labels;
this.score = score;
this.docIds = docIds;
+ this.subclusters = subclusters;
+ this.otherTopics = otherTopics;
}
@Override
public boolean equals(Object o) {
- if (this == o) return true;
- if (!(o instanceof Cluster)) return false;
-
- Cluster cluster = (Cluster) o;
-
- if (Double.compare(cluster.score, score) != 0) return false;
- if (!docIds.equals(cluster.docIds)) return false;
- if (!labels.equals(cluster.labels)) return false;
+ return o != null &&
+ this.getClass().isInstance(o) &&
+ equalsTo((Cluster) o);
+ }
- return true;
+ private boolean equalsTo(Cluster o) {
+ return Double.compare(o.score, score) == 0 &&
+ Objects.equals(o.docIds, docIds) &&
+ Objects.equals(o.labels, labels) &&
+ Objects.equals(o.subclusters, subclusters);
}
@Override
public int hashCode() {
- int result;
- long temp;
- result = labels.hashCode();
- temp = Double.doubleToLongBits(score);
- result = 31 * result + (int) (temp ^ (temp >>> 32));
- result = 31 * result + docIds.hashCode();
- return result;
+ return Objects.hash(subclusters, docIds, labels, score);
}
public List<String> getLabels() {
@@ -89,5 +93,15 @@ public class Cluster {
this.docIds = docIds;
}
+ public List<Cluster> getSubclusters() {
+ return subclusters;
+ }
+ /**
+ * @return If <code>true</code>, the cluster contains references to documents that are not semantically associated
+ * and form a group of documents not related to any other cluster (or themselves).
+ */
+ public boolean isOtherTopics() {
+ return otherTopics;
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fb72bfe/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java b/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java
index ad6e048..73afb6b 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/response/ClusteringResponse.java
@@ -15,8 +15,10 @@
* limitations under the License.
*/
package org.apache.solr.client.solrj.response;
-import java.util.LinkedList;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
import org.apache.solr.common.util.NamedList;
@@ -24,21 +26,47 @@ import org.apache.solr.common.util.NamedList;
* Encapsulates responses from ClusteringComponent
*/
public class ClusteringResponse {
-
+ private static final String CLUSTERS_NODE = "clusters";
private static final String LABELS_NODE = "labels";
private static final String DOCS_NODE = "docs";
private static final String SCORE_NODE = "score";
- private List<Cluster> clusters = new LinkedList<Cluster>();
+ private static final String IS_OTHER_TOPICS = "other-topics";
+ private List<Cluster> clusters;
+ @SuppressWarnings("unchecked")
public ClusteringResponse(List<NamedList<Object>> clusterInfo) {
+ clusters = new ArrayList<Cluster>();
for (NamedList<Object> clusterNode : clusterInfo) {
- List<String> labelList;
- List<String> docIdList;
- labelList = (List<String>) clusterNode.get(LABELS_NODE);
- double score = (double) clusterNode.get(SCORE_NODE);
- docIdList = (List<String>) clusterNode.get(DOCS_NODE);
- Cluster currentCluster = new Cluster(labelList, score, docIdList);
- clusters.add(currentCluster);
+ List<String> labelList, docIdList;
+ List<Cluster> subclusters = Collections.emptyList();
+ labelList = docIdList = Collections.emptyList();
+ Double score = 0d;
+ boolean otherTopics = false;
+ for (Map.Entry<String, ?> e : clusterNode) {
+ switch (e.getKey()) {
+ case LABELS_NODE:
+ labelList = (List<String>) e.getValue();
+ break;
+
+ case DOCS_NODE:
+ docIdList = (List<String>) e.getValue();
+ break;
+
+ case SCORE_NODE:
+ score = (Double) e.getValue();
+ break;
+
+ case CLUSTERS_NODE:
+ subclusters = new ClusteringResponse((List<NamedList<Object>>) e.getValue()).getClusters();
+ break;
+
+ case IS_OTHER_TOPICS:
+ otherTopics = (Boolean) e.getValue();
+ break;
+ }
+ }
+
+ clusters.add(new Cluster(labelList, score, docIdList, subclusters, otherTopics));
}
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fb72bfe/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml b/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml
index 16d6e4a..ea042c9 100644
--- a/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml
+++ b/solr/solrj/src/test-files/solrj/sampleClusteringResponse.xml
@@ -58,6 +58,25 @@
<str>id2</str>
<str>id3</str>
</arr>
+ <arr name="clusters">
+ <lst>
+ <arr name="labels">
+ <str>label1.sub1</str>
+ </arr>
+ <arr name="docs">
+ <str>id1</str>
+ <str>id2</str>
+ </arr>
+ </lst>
+ <lst>
+ <arr name="labels">
+ <str>label1.sub2</str>
+ </arr>
+ <arr name="docs">
+ <str>id2</str>
+ </arr>
+ </lst>
+ </arr>
</lst>
<lst>
<arr name="labels">
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/7fb72bfe/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java
index 5bc20e1..7e789d1 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/response/TestClusteringResponse.java
@@ -19,7 +19,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
-import java.util.LinkedList;
+import java.util.Arrays;
import java.util.List;
import org.apache.solr.SolrJettyTestBase;
@@ -49,51 +49,21 @@ public class TestClusteringResponse extends SolrJettyTestBase {
List<Cluster> clusters = clusteringResponse.getClusters();
Assert.assertEquals(4, clusters.size());
- //First Cluster
- Cluster cluster1 = clusters.get(0);
- List<String> expectedLabel1 = new LinkedList<String>();
- expectedLabel1.add("label1");
- List<String> expectedDocs1 = new LinkedList<String>();
- expectedDocs1.add("id1");
- expectedDocs1.add("id2");
- expectedDocs1.add("id3");
- Assert.assertEquals(expectedLabel1, cluster1.getLabels());
- Assert.assertEquals(expectedDocs1, cluster1.getDocs());
- Assert.assertEquals(expectedLabel1, cluster1.getLabels());
- Assert.assertEquals(0.6, cluster1.getScore(), 0);
- //Second Cluster
- Cluster cluster2 = clusters.get(1);
- List<String> expectedLabel2 = new LinkedList<String>();
- expectedLabel2.add("label2");
- List<String> expectedDocs2 = new LinkedList<String>();
- expectedDocs2.add("id5");
- expectedDocs2.add("id6");
- Assert.assertEquals(expectedLabel2, cluster2.getLabels());
- Assert.assertEquals(expectedDocs2, cluster2.getDocs());
- Assert.assertEquals(expectedLabel2, cluster2.getLabels());
- Assert.assertEquals(0.93, cluster2.getScore(), 0);
- //Third Cluster
- Cluster cluster3 = clusters.get(2);
- List<String> expectedLabel3 = new LinkedList<String>();
- expectedLabel3.add("label3");
- List<String> expectedDocs3 = new LinkedList<String>();
- expectedDocs3.add("id7");
- expectedDocs3.add("id8");
- Assert.assertEquals(expectedLabel3, cluster3.getLabels());
- Assert.assertEquals(expectedDocs3, cluster3.getDocs());
- Assert.assertEquals(expectedLabel3, cluster3.getLabels());
- Assert.assertEquals(1.26, cluster3.getScore(), 0);
- //Fourth Cluster
- Cluster cluster4 = clusters.get(3);
- List<String> expectedLabel4 = new LinkedList<String>();
- expectedLabel4.add("label4");
- List<String> expectedDocs4 = new LinkedList<String>();
- expectedDocs4.add("id9");
- Assert.assertEquals(expectedLabel4, cluster4.getLabels());
- Assert.assertEquals(expectedDocs4, cluster4.getDocs());
- Assert.assertEquals(expectedLabel4, cluster4.getLabels());
- Assert.assertEquals(0.0, cluster4.getScore(), 0);
-
+ checkCluster(clusters.get(0), Arrays.asList("label1"), Arrays.asList("id1", "id2", "id3"), 0.6d, false);
+ checkCluster(clusters.get(1), Arrays.asList("label2"), Arrays.asList("id5", "id6"), 0.93d, false);
+ checkCluster(clusters.get(2), Arrays.asList("label3"), Arrays.asList("id7", "id8"), 1.26d, false);
+ checkCluster(clusters.get(3), Arrays.asList("label4"), Arrays.asList("id9"), 0d, true);
+
+ List<Cluster> sub = clusters.get(0).getSubclusters();
+ checkCluster(sub.get(0), Arrays.asList("label1.sub1"), Arrays.asList("id1", "id2"), 0.0d, false);
+ checkCluster(sub.get(1), Arrays.asList("label1.sub2"), Arrays.asList("id2"), 0.0d, false);
+ assertEquals(sub.size(), 2);
}
+ private void checkCluster(Cluster cluster, List<String> labels, List<String> docRefs, double score, boolean otherTopics) {
+ Assert.assertEquals(cluster.getLabels(), labels);
+ Assert.assertEquals(cluster.getDocs(), docRefs);
+ Assert.assertTrue(Double.compare(cluster.getScore(), score) == 0);
+ Assert.assertEquals(otherTopics, cluster.isOtherTopics());
+ }
}