You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/05/14 12:48:30 UTC
svn commit: r1103024 - in /lucene/dev/trunk:
lucene/src/java/org/apache/lucene/util/ modules/ modules/grouping/
modules/grouping/src/ modules/grouping/src/java/
modules/grouping/src/java/org/ modules/grouping/src/java/org/apache/
modules/grouping/src/j...
Author: mikemccand
Date: Sat May 14 10:48:29 2011
New Revision: 1103024
URL: http://svn.apache.org/viewvc?rev=1103024&view=rev
Log:
LUCENE-1421: factor out shared grouping module
Added:
lucene/dev/trunk/modules/grouping/
lucene/dev/trunk/modules/grouping/CHANGES.txt (with props)
lucene/dev/trunk/modules/grouping/build.xml (with props)
lucene/dev/trunk/modules/grouping/src/
lucene/dev/trunk/modules/grouping/src/java/
lucene/dev/trunk/modules/grouping/src/java/org/
lucene/dev/trunk/modules/grouping/src/java/org/apache/
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/CachingCollector.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (with props)
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (with props)
lucene/dev/trunk/modules/grouping/src/java/overview.html (with props)
lucene/dev/trunk/modules/grouping/src/test/
lucene/dev/trunk/modules/grouping/src/test/org/
lucene/dev/trunk/modules/grouping/src/test/org/apache/
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (with props)
Modified:
lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
lucene/dev/trunk/modules/build.xml
Modified: lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java?rev=1103024&r1=1103023&r2=1103024&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java (original)
+++ lucene/dev/trunk/lucene/src/java/org/apache/lucene/util/BytesRef.java Sat May 14 10:48:29 2011
@@ -19,9 +19,6 @@ package org.apache.lucene.util;
import java.util.Comparator;
import java.io.UnsupportedEncodingException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.io.IOException;
/** Represents byte[], as a slice (offset + length) into an
* existing byte[].
@@ -192,6 +189,9 @@ public final class BytesRef implements C
@Override
public boolean equals(Object other) {
+ if (other == null) {
+ return false;
+ }
return this.bytesEquals((BytesRef) other);
}
Modified: lucene/dev/trunk/modules/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/build.xml?rev=1103024&r1=1103023&r2=1103024&view=diff
==============================================================================
--- lucene/dev/trunk/modules/build.xml (original)
+++ lucene/dev/trunk/modules/build.xml Sat May 14 10:48:29 2011
@@ -24,6 +24,7 @@
<subant target="test" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -33,6 +34,7 @@
<subant target="compile" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -42,6 +44,7 @@
<subant target="compile-test" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -51,6 +54,7 @@
<subant target="javadocs" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -61,6 +65,7 @@
<subant target="dist-maven" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
@@ -90,6 +95,7 @@
<subant target="clean" inheritall="false" failonerror="true">
<fileset dir="analysis" includes="build.xml" />
<fileset dir="benchmark" includes="build.xml" />
+ <fileset dir="grouping" includes="build.xml" />
</subant>
</sequential>
</target>
Added: lucene/dev/trunk/modules/grouping/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/CHANGES.txt?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/CHANGES.txt (added)
+++ lucene/dev/trunk/modules/grouping/CHANGES.txt Sat May 14 10:48:29 2011
@@ -0,0 +1,8 @@
+Grouping Module Change Log
+
+======================= Trunk (not yet released) =======================
+
+LUCENE-1421: create new grouping module, enabling search results to be
+grouped by a single-valued indexed field. This module was factored
+out of Solr's grouping implementation, except it cannot group by
+function queries nor arbitrary queries. (Mike McCandless)
Added: lucene/dev/trunk/modules/grouping/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/build.xml?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/build.xml (added)
+++ lucene/dev/trunk/modules/grouping/build.xml Sat May 14 10:48:29 2011
@@ -0,0 +1,13 @@
+<?xml version="1.0"?>
+<project name="grouping" default="default">
+ <description>
+ Collectors for grouping search results
+ </description>
+
+ <property name="build.dir" location="build/" />
+ <property name="dist.dir" location="dist/" />
+ <property name="maven.dist.dir" location="../dist/maven" />
+
+ <import file="../../lucene/contrib/contrib-build.xml"/>
+ <property name="working.dir" location="work"/>
+</project>
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/CachingCollector.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/CachingCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/CachingCollector.java Sat May 14 10:48:29 2011
@@ -0,0 +1,256 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.RamUsageEstimator;
+
+/**
+ * Caches all docs, and optionally also scores, coming from
+ * a search, and is then able to replay them to another
+ * collector. You specify the max RAM this class may use.
+ * Once the collection is done, call {@link #isCached}. If
+ * this returns true, you can use {@link #replay} against a
+ * new collector. If it returns false, this means too much
+ * RAM was required and you must instead re-run the original
+ * search.
+ *
+ * <p><b>NOTE</b>: this class consumes 4 (or 8 bytes, if
+ * scoring is cached) per collected document. If the result
+ * set is large this can easily be a very substantial amount
+ * of RAM!
+ *
+ * @lucene.experimental
+ */
+public class CachingCollector extends Collector {
+
+ private static class SegStart {
+ public final AtomicReaderContext readerContext;
+ public final int end;
+
+ public SegStart(AtomicReaderContext readerContext, int end) {
+ this.readerContext = readerContext;
+ this.end = end;
+ }
+ }
+
+ // TODO: would be nice if a collector defined a
+ // needsScores() method so we can specialize / do checks
+ // up front:
+ private final Collector other;
+ private final int maxDocsToCache;
+
+ private final Scorer cachedScorer;
+ private final List<int[]> cachedDocs;
+ private final List<float[]> cachedScores;
+ private final List<SegStart> cachedSegs = new ArrayList<SegStart>();
+
+ private Scorer scorer;
+ private int[] curDocs;
+ private float[] curScores;
+ private int upto;
+ private AtomicReaderContext lastReaderContext;
+ private float score;
+ private int base;
+ private int doc;
+
+ public CachingCollector(Collector other, boolean cacheScores, double maxRAMMB) {
+ this.other = other;
+ if (cacheScores) {
+ cachedScorer = new Scorer(null) {
+ @Override
+ public float score() {
+ return score;
+ }
+
+ @Override
+ public int advance(int target) {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public float freq() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public int nextDoc() {
+ throw new UnsupportedOperationException();
+ }
+ };
+ cachedScores = new ArrayList<float[]>();
+ curScores = new float[128];
+ cachedScores.add(curScores);
+ } else {
+ cachedScorer = null;
+ cachedScores = null;
+ }
+ cachedDocs = new ArrayList<int[]>();
+ curDocs = new int[128];
+ cachedDocs.add(curDocs);
+
+ final int bytesPerDoc;
+ if (curScores != null) {
+ bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT + RamUsageEstimator.NUM_BYTES_FLOAT;
+ } else {
+ bytesPerDoc = RamUsageEstimator.NUM_BYTES_INT;
+ }
+ maxDocsToCache = (int) ((maxRAMMB * 1024 * 1024)/bytesPerDoc);
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ other.setScorer(cachedScorer);
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return other.acceptsDocsOutOfOrder();
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+
+ if (curDocs == null) {
+ // Cache was too large
+ if (curScores != null) {
+ score = scorer.score();
+ }
+ this.doc = doc;
+ other.collect(doc);
+ return;
+ }
+
+ if (upto == curDocs.length) {
+ base += upto;
+ final int nextLength;
+ // Max out at 512K arrays:
+ if (curDocs.length < 524288) {
+ nextLength = 8*curDocs.length;
+ } else {
+ nextLength = curDocs.length;
+ }
+
+ if (base + nextLength > maxDocsToCache) {
+ // Too many docs to collect -- clear cache
+ curDocs = null;
+ if (curScores != null) {
+ score = scorer.score();
+ }
+ this.doc = doc;
+ other.collect(doc);
+ cachedDocs.clear();
+ cachedScores.clear();
+ return;
+ }
+ curDocs = new int[nextLength];
+ cachedDocs.add(curDocs);
+ if (curScores != null) {
+ curScores = new float[nextLength];
+ cachedScores.add(curScores);
+ }
+ upto = 0;
+ }
+ curDocs[upto] = doc;
+ // TODO: maybe specialize private subclass so we don't
+ // null check per collect...
+ if (curScores != null) {
+ score = curScores[upto] = scorer.score();
+ }
+ upto++;
+ this.doc = doc;
+ other.collect(doc);
+ }
+
+ public boolean isCached() {
+ return curDocs != null;
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ other.setNextReader(context);
+ if (lastReaderContext != null) {
+ cachedSegs.add(new SegStart(lastReaderContext, base+upto));
+ }
+ lastReaderContext = context;
+ }
+
+ private final static int[] EMPTY_INT_ARRAY = new int[0];
+
+ @Override
+ public String toString() {
+ if (isCached()) {
+ return "CachingCollector (" + (base+upto) + " docs " + (curScores != null ? " & scores" : "") + " cached)";
+ } else {
+ return "CachingCollector (cache was cleared)";
+ }
+ }
+
+ public void replay(Collector other) throws IOException {
+ if (!isCached()) {
+ throw new IllegalStateException("cannot replay: cache was cleared because too much RAM was required");
+ }
+ //System.out.println("CC: replay totHits=" + (upto + base));
+ if (lastReaderContext != null) {
+ cachedSegs.add(new SegStart(lastReaderContext, base+upto));
+ lastReaderContext = null;
+ }
+ final int uptoSav = upto;
+ final int baseSav = base;
+ try {
+ upto = 0;
+ base = 0;
+ int chunkUpto = 0;
+ other.setScorer(cachedScorer);
+ curDocs = EMPTY_INT_ARRAY;
+ for(SegStart seg : cachedSegs) {
+ other.setNextReader(seg.readerContext);
+ while(base+upto < seg.end) {
+ if (upto == curDocs.length) {
+ base += curDocs.length;
+ curDocs = cachedDocs.get(chunkUpto);
+ if (curScores != null) {
+ curScores = cachedScores.get(chunkUpto);
+ }
+ chunkUpto++;
+ upto = 0;
+ }
+ if (curScores != null) {
+ score = curScores[upto];
+ }
+ other.collect(curDocs[upto++]);
+ }
+ }
+ } finally {
+ upto = uptoSav;
+ base = baseSav;
+ }
+ }
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/FirstPassGroupingCollector.java Sat May 14 10:48:29 2011
@@ -0,0 +1,362 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.TreeSet;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.BytesRef;
+
+/** FirstPassGroupingCollector is the first of two passes necessary
+ * to collected grouped hits. This pass gathers the top N sorted
+ * groups.
+ *
+ * @lucene.experimental
+ */
+
+public class FirstPassGroupingCollector extends Collector {
+
+ private final String groupField;
+ private final Sort groupSort;
+ private final FieldComparator[] comparators;
+ private final int[] reversed;
+ private final int topNGroups;
+ private final HashMap<BytesRef, CollectedSearchGroup> groupMap;
+ private final BytesRef scratchBytesRef = new BytesRef();
+ private final int compIDXEnd;
+
+ // Set once we reach topNGroups unique groups:
+ private TreeSet<CollectedSearchGroup> orderedGroups;
+ private int docBase;
+ private int spareSlot;
+ private FieldCache.DocTermsIndex index;
+
+ /**
+ * Create the first pass collector.
+ *
+ * @param groupField The field used to group
+ * documents. This field must be single-valued and
+ * indexed (FieldCache is used to access its value
+ * per-document).
+ * @param groupSort The {@link Sort} used to sort the
+ * groups. The top sorted document within each group
+ * according to groupSort, determines how that group
+ * sorts against other groups. This must be non-null,
+ * ie, if you want to groupSort by relevance use
+ * Sort.RELEVANCE.
+ * @param topNGroups How many top groups to keep.
+ */
+ public FirstPassGroupingCollector(String groupField, Sort groupSort, int topNGroups) throws IOException {
+ if (topNGroups < 1) {
+ throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
+ }
+
+ this.groupField = groupField;
+ // TODO: allow null groupSort to mean "by relevance",
+ // and specialize it?
+ this.groupSort = groupSort;
+
+ this.topNGroups = topNGroups;
+
+ final SortField[] sortFields = groupSort.getSort();
+ comparators = new FieldComparator[sortFields.length];
+ compIDXEnd = comparators.length - 1;
+ reversed = new int[sortFields.length];
+ for (int i = 0; i < sortFields.length; i++) {
+ final SortField sortField = sortFields[i];
+
+ // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
+ comparators[i] = sortField.getComparator(topNGroups + 1, i);
+ reversed[i] = sortField.getReverse() ? -1 : 1;
+ }
+
+ spareSlot = topNGroups;
+ groupMap = new HashMap<BytesRef, CollectedSearchGroup>(topNGroups);
+ }
+
+ /** Returns top groups, starting from offset. This may
+ * return null, if no groups were collected, or if the
+ * number of unique groups collected is <= offset. */
+ public Collection<SearchGroup> getTopGroups(int groupOffset, boolean fillFields) {
+
+ //System.out.println("FP.getTopGroups groupOffset=" + groupOffset + " fillFields=" + fillFields + " groupMap.size()=" + groupMap.size());
+
+ if (groupOffset < 0) {
+ throw new IllegalArgumentException("groupOffset must be >= 0 (got " + groupOffset + ")");
+ }
+
+ if (groupMap.size() <= groupOffset) {
+ return null;
+ }
+
+ if (orderedGroups == null) {
+ buildSortedSet();
+ }
+
+ final Collection<SearchGroup> result = new ArrayList<SearchGroup>();
+ int upto = 0;
+ final int sortFieldCount = groupSort.getSort().length;
+ for(CollectedSearchGroup group : orderedGroups) {
+ if (upto++ < groupOffset) {
+ continue;
+ }
+ //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ SearchGroup searchGroup = new SearchGroup();
+ searchGroup.groupValue = group.groupValue;
+ if (fillFields) {
+ searchGroup.sortValues = new Comparable[sortFieldCount];
+ for(int sortFieldIDX=0;sortFieldIDX<sortFieldCount;sortFieldIDX++) {
+ searchGroup.sortValues[sortFieldIDX] = comparators[sortFieldIDX].value(group.comparatorSlot);
+ }
+ }
+ result.add(searchGroup);
+ }
+ //System.out.println(" return " + result.size() + " groups");
+ return result;
+ }
+
+ public String getGroupField() {
+ return groupField;
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ for (FieldComparator comparator : comparators) {
+ comparator.setScorer(scorer);
+ }
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ //System.out.println("FP.collect doc=" + doc);
+
+ // If orderedGroups != null we already have collected N groups and
+ // can short circuit by comparing this document to the bottom group,
+ // without having to find what group this document belongs to.
+
+ // Even if this document belongs to a group in the top N, we'll know that
+ // we don't have to update that group.
+
+ // Downside: if the number of unique groups is very low, this is
+ // wasted effort as we will most likely be updating an existing group.
+ if (orderedGroups != null) {
+ for (int compIDX = 0;; compIDX++) {
+ final int c = reversed[compIDX] * comparators[compIDX].compareBottom(doc);
+ if (c < 0) {
+ // Definitely not competitive. So don't even bother to continue
+ return;
+ } else if (c > 0) {
+ // Definitely competitive.
+ break;
+ } else if (compIDX == compIDXEnd) {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return;
+ }
+ }
+ }
+
+ // TODO: should we add option to mean "ignore docs that
+ // don't have the group field" (instead of stuffing them
+ // under null group)?
+ final int ord = index.getOrd(doc);
+ //System.out.println(" ord=" + ord);
+
+ final BytesRef br = ord == 0 ? null : index.lookup(ord, scratchBytesRef);
+ //System.out.println(" group=" + (br == null ? "null" : br.utf8ToString()));
+
+ final CollectedSearchGroup group = groupMap.get(br);
+
+ if (group == null) {
+
+ // First time we are seeing this group, or, we've seen
+ // it before but it fell out of the top N and is now
+ // coming back
+
+ if (groupMap.size() < topNGroups) {
+
+ // Still in startup transient: we have not
+ // seen enough unique groups to start pruning them;
+ // just keep collecting them
+
+ // Add a new CollectedSearchGroup:
+ CollectedSearchGroup sg = new CollectedSearchGroup();
+ sg.groupValue = ord == 0 ? null : new BytesRef(scratchBytesRef);
+ sg.comparatorSlot = groupMap.size();
+ sg.topDoc = docBase + doc;
+ for (FieldComparator fc : comparators) {
+ fc.copy(sg.comparatorSlot, doc);
+ }
+ groupMap.put(sg.groupValue, sg);
+
+ if (groupMap.size() == topNGroups) {
+ // End of startup transient: we now have max
+ // number of groups; from here on we will drop
+ // bottom group when we insert new one:
+ buildSortedSet();
+ }
+
+ return;
+ }
+
+ // We already tested that the document is competitive, so replace
+ // the bottom group with this new group.
+
+ final CollectedSearchGroup bottomGroup = orderedGroups.pollLast();
+ assert orderedGroups.size() == topNGroups -1;
+
+ groupMap.remove(bottomGroup.groupValue);
+
+ // reuse the removed CollectedSearchGroup
+ if (br == null) {
+ bottomGroup.groupValue = null;
+ } else if (bottomGroup.groupValue != null) {
+ bottomGroup.groupValue.copy(br);
+ } else {
+ bottomGroup.groupValue = new BytesRef(br);
+ }
+ bottomGroup.topDoc = docBase + doc;
+
+ for (FieldComparator fc : comparators) {
+ fc.copy(bottomGroup.comparatorSlot, doc);
+ }
+
+ groupMap.put(bottomGroup.groupValue, bottomGroup);
+ orderedGroups.add(bottomGroup);
+ assert orderedGroups.size() == topNGroups;
+
+ final int lastComparatorSlot = orderedGroups.last().comparatorSlot;
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(lastComparatorSlot);
+ }
+
+ return;
+ }
+
+ // Update existing group:
+ for (int compIDX = 0;; compIDX++) {
+ final FieldComparator fc = comparators[compIDX];
+ fc.copy(spareSlot, doc);
+
+ final int c = reversed[compIDX] * fc.compare(group.comparatorSlot, spareSlot);
+ if (c < 0) {
+ // Definitely not competitive.
+ return;
+ } else if (c > 0) {
+ // Definitely competitive; set remaining comparators:
+ for (int compIDX2=compIDX+1; compIDX2<comparators.length; compIDX2++) {
+ comparators[compIDX2].copy(spareSlot, doc);
+ }
+ break;
+ } else if (compIDX == compIDXEnd) {
+ // Here c=0. If we're at the last comparator, this doc is not
+ // competitive, since docs are visited in doc Id order, which means
+ // this doc cannot compete with any other document in the queue.
+ return;
+ }
+ }
+
+ // Remove before updating the group since lookup is done via comparators
+ // TODO: optimize this
+
+ final CollectedSearchGroup prevLast;
+ if (orderedGroups != null) {
+ prevLast = orderedGroups.last();
+ orderedGroups.remove(group);
+ assert orderedGroups.size() == topNGroups-1;
+ } else {
+ prevLast = null;
+ }
+
+ group.topDoc = docBase + doc;
+
+ // Swap slots
+ final int tmp = spareSlot;
+ spareSlot = group.comparatorSlot;
+ group.comparatorSlot = tmp;
+
+ // Re-add the changed group
+ if (orderedGroups != null) {
+ orderedGroups.add(group);
+ assert orderedGroups.size() == topNGroups;
+ final CollectedSearchGroup newLast = orderedGroups.last();
+ // If we changed the value of the last group, or changed which group was last, then update bottom:
+ if (group == newLast || prevLast != newLast) {
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(newLast.comparatorSlot);
+ }
+ }
+ }
+ }
+
+ private void buildSortedSet() {
+ final Comparator<CollectedSearchGroup> comparator = new Comparator<CollectedSearchGroup>() {
+ public int compare(CollectedSearchGroup o1, CollectedSearchGroup o2) {
+ for (int compIDX = 0;; compIDX++) {
+ FieldComparator fc = comparators[compIDX];
+ final int c = reversed[compIDX] * fc.compare(o1.comparatorSlot, o2.comparatorSlot);
+ if (c != 0) {
+ return c;
+ } else if (compIDX == compIDXEnd) {
+ return o1.topDoc - o2.topDoc;
+ }
+ }
+ }
+ };
+
+ orderedGroups = new TreeSet<CollectedSearchGroup>(comparator);
+ orderedGroups.addAll(groupMap.values());
+ assert orderedGroups.size() > 0;
+
+ for (FieldComparator fc : comparators) {
+ fc.setBottom(orderedGroups.last().comparatorSlot);
+ }
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ docBase = readerContext.docBase;
+ index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
+
+ for (int i=0; i<comparators.length; i++) {
+ comparators[i] = comparators[i].setNextReader(readerContext);
+ }
+ }
+}
+
+class CollectedSearchGroup extends SearchGroup {
+ int topDoc;
+ int comparatorSlot;
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/GroupDocs.java Sat May 14 10:48:29 2011
@@ -0,0 +1,57 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.util.BytesRef;
+
+/** Represents one group in the results.
+ *
+ * @lucene.experimental */
+public class GroupDocs {
+ /** The groupField value for all docs in this group; this
+ * may be null if hits did not have the groupField. */
+ public final BytesRef groupValue;
+
+ /** Max score in this group */
+ public final float maxScore;
+
+ /** Hits; this may be {@link
+ * org.apache.lucene.search.FieldDoc} instances if the
+ * withinGroupSort sorted by fields. */
+ public final ScoreDoc[] scoreDocs;
+
+ /** Total hits within this group */
+ public final int totalHits;
+
+ /** Matches the groupSort passed to {@link
+ * FirstPassGroupingCollector}. */
+ public final Comparable[] groupSortValues;
+
+ public GroupDocs(float maxScore,
+ int totalHits,
+ ScoreDoc[] scoreDocs,
+ BytesRef groupValue,
+ Comparable[] groupSortValues) {
+ this.maxScore = maxScore;
+ this.totalHits = totalHits;
+ this.scoreDocs = scoreDocs;
+ this.groupValue = groupValue;
+ this.groupSortValues = groupSortValues;
+ }
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SearchGroup.java Sat May 14 10:48:29 2011
@@ -0,0 +1,26 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.BytesRef;
+
+/** @lucene.experimental */
+class SearchGroup {
+ public BytesRef groupValue;
+ public Comparable[] sortValues;
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SecondPassGroupingCollector.java Sat May 14 10:48:29 2011
@@ -0,0 +1,165 @@
+package org.apache.lucene.search.grouping;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopDocsCollector;
+import org.apache.lucene.search.TopFieldCollector;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * See {@link FirstPassGroupingCollector}.
+ * @lucene.experimental
+ */
+public class SecondPassGroupingCollector extends Collector {
+ private final HashMap<BytesRef, SearchGroupDocs> groupMap;
+
+ private FieldCache.DocTermsIndex index;
+ private final String groupField;
+ private final int maxDocsPerGroup;
+ private final SentinelIntSet ordSet;
+ private final SearchGroupDocs[] groupDocs;
+ private final BytesRef spareBytesRef = new BytesRef();
+ private final Collection<SearchGroup> groups;
+ private final Sort withinGroupSort;
+ private final Sort groupSort;
+
+ private int totalHitCount;
+ private int totalGroupedHitCount;
+
+ public SecondPassGroupingCollector(String groupField, Collection<SearchGroup> groups, Sort groupSort, Sort withinGroupSort,
+ int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields)
+ throws IOException {
+
+ //System.out.println("SP init");
+ if (groups.size() == 0) {
+ throw new IllegalArgumentException("no groups to collect (groups.size() is 0)");
+ }
+
+ this.groupSort = groupSort;
+ this.withinGroupSort = withinGroupSort;
+ this.groups = groups;
+ this.groupField = groupField;
+ this.maxDocsPerGroup = maxDocsPerGroup;
+
+ groupMap = new HashMap<BytesRef, SearchGroupDocs>(groups.size());
+
+ for (SearchGroup group : groups) {
+ //System.out.println(" prep group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ final TopDocsCollector collector;
+ if (withinGroupSort == null) {
+ // Sort by score
+ collector = TopScoreDocCollector.create(maxDocsPerGroup, true);
+ } else {
+ // Sort by fields
+ collector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, fillSortFields, getScores, getMaxScores, true);
+ }
+ groupMap.put(group.groupValue,
+ new SearchGroupDocs(group.groupValue,
+ collector));
+ }
+
+ ordSet = new SentinelIntSet(groupMap.size(), -1);
+ groupDocs = new SearchGroupDocs[ordSet.keys.length];
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setScorer(scorer);
+ }
+ }
+
+ @Override
+ public void collect(int doc) throws IOException {
+ final int slot = ordSet.find(index.getOrd(doc));
+ //System.out.println("SP.collect doc=" + doc + " slot=" + slot);
+ totalHitCount++;
+ if (slot >= 0) {
+ totalGroupedHitCount++;
+ groupDocs[slot].collector.collect(doc);
+ }
+ }
+
+ @Override
+ public void setNextReader(AtomicReaderContext readerContext) throws IOException {
+ //System.out.println("SP.setNextReader");
+ for (SearchGroupDocs group : groupMap.values()) {
+ group.collector.setNextReader(readerContext);
+ }
+ index = FieldCache.DEFAULT.getTermsIndex(readerContext.reader, groupField);
+
+ // Rebuild ordSet
+ ordSet.clear();
+ for (SearchGroupDocs group : groupMap.values()) {
+ //System.out.println(" group=" + (group.groupValue == null ? "null" : group.groupValue.utf8ToString()));
+ int ord = group.groupValue == null ? 0 : index.binarySearchLookup(group.groupValue, spareBytesRef);
+ if (ord >= 0) {
+ groupDocs[ordSet.put(ord)] = group;
+ }
+ }
+ }
+
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return false;
+ }
+
+ public TopGroups getTopGroups(int withinGroupOffset) {
+ final GroupDocs[] groupDocsResult = new GroupDocs[groups.size()];
+
+ int groupIDX = 0;
+ for(SearchGroup group : groups) {
+ final SearchGroupDocs groupDocs = groupMap.get(group.groupValue);
+ final TopDocs topDocs = groupDocs.collector.topDocs(withinGroupOffset, maxDocsPerGroup);
+ groupDocsResult[groupIDX++] = new GroupDocs(topDocs.getMaxScore(),
+ topDocs.totalHits,
+ topDocs.scoreDocs,
+ groupDocs.groupValue,
+ group.sortValues);
+ }
+
+ return new TopGroups(groupSort.getSort(),
+ withinGroupSort == null ? null : withinGroupSort.getSort(),
+ totalHitCount, totalGroupedHitCount, groupDocsResult);
+ }
+}
+
+
+// TODO: merge with SearchGroup or not?
+// ad: don't need to build a new hashmap
+// disad: blows up the size of SearchGroup if we need many of them, and couples implementations
+class SearchGroupDocs {
+ public final BytesRef groupValue;
+ public final TopDocsCollector collector;
+
+ public SearchGroupDocs(BytesRef groupValue, TopDocsCollector collector) {
+ this.groupValue = groupValue;
+ this.collector = collector;
+ }
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/SentinelIntSet.java Sat May 14 10:48:29 2011
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.util.Arrays;
+
+/** A native int set where one value is reserved to mean "EMPTY" */
+class SentinelIntSet {
+ public int[] keys;
+ public int count;
+ public final int emptyVal;
+ public int rehashCount; // the count at which a rehash should be done
+
+ public SentinelIntSet(int size, int emptyVal) {
+ this.emptyVal = emptyVal;
+ int tsize = Math.max(org.apache.lucene.util.BitUtil.nextHighestPowerOfTwo(size), 1);
+ rehashCount = tsize - (tsize>>2);
+ if (size >= rehashCount) { // should be able to hold "size" w/o rehashing
+ tsize <<= 1;
+ rehashCount = tsize - (tsize>>2);
+ }
+ keys = new int[tsize];
+ if (emptyVal != 0)
+ clear();
+ }
+
+ public void clear() {
+ Arrays.fill(keys, emptyVal);
+ count = 0;
+ }
+
+ public int hash(int key) {
+ return key;
+ }
+
+ public int size() { return count; }
+
+ /** returns the slot for this key */
+ public int getSlot(int key) {
+ assert key != emptyVal;
+ int h = hash(key);
+ int s = h & (keys.length-1);
+ if (keys[s] == key || keys[s]== emptyVal) return s;
+
+ int increment = (h>>7)|1;
+ do {
+ s = (s + increment) & (keys.length-1);
+ } while (keys[s] != key && keys[s] != emptyVal);
+ return s;
+ }
+
+ /** returns the slot for this key, or -slot-1 if not found */
+ public int find(int key) {
+ assert key != emptyVal;
+ int h = hash(key);
+ int s = h & (keys.length-1);
+ if (keys[s] == key) return s;
+ if (keys[s] == emptyVal) return -s-1;
+
+ int increment = (h>>7)|1;
+ for(;;) {
+ s = (s + increment) & (keys.length-1);
+ if (keys[s] == key) return s;
+ if (keys[s] == emptyVal) return -s-1;
+ }
+ }
+
+ public boolean exists(int key) {
+ return find(key) >= 0;
+ }
+
+ public int put(int key) {
+ int s = find(key);
+ if (s < 0) {
+ if (count >= rehashCount) {
+ rehash();
+ s = getSlot(key);
+ } else {
+ s = -s-1;
+ }
+ count++;
+ keys[s] = key;
+ }
+ return s;
+ }
+
+ public void rehash() {
+ int newSize = keys.length << 1;
+ int[] oldKeys = keys;
+ keys = new int[newSize];
+ if (emptyVal != 0) Arrays.fill(keys, emptyVal);
+
+ for (int i=0; i<oldKeys.length; i++) {
+ int key = oldKeys[i];
+ if (key == emptyVal) continue;
+ int newSlot = getSlot(key);
+ keys[newSlot] = key;
+ }
+ rehashCount = newSize - (newSize>>2);
+ }
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/TopGroups.java Sat May 14 10:48:29 2011
@@ -0,0 +1,51 @@
+package org.apache.lucene.search.grouping;
+
+import org.apache.lucene.search.SortField;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Represents result returned by a grouping search.
+ *
+ * Note that we do not return the total number of unique
+ * groups; doing so would be costly.
+ *
+ * @lucene.experimental */
+public class TopGroups {
+ /** Number of documents matching the search */
+ public final int totalHitCount;
+
+ /** Number of documents grouped into the topN groups */
+ public final int totalGroupedHitCount;
+
+ /** Group results in groupSort order */
+ public final GroupDocs[] groups;
+
+ /** How groups are sorted against each other */
+ public final SortField[] groupSort;
+
+ /** How docs are sorted within each group */
+ public final SortField[] withinGroupSort;
+
+ public TopGroups(SortField[] groupSort, SortField[] withinGroupSort, int totalHitCount, int totalGroupedHitCount, GroupDocs[] groups) {
+ this.groupSort = groupSort;
+ this.withinGroupSort = withinGroupSort;
+ this.totalHitCount = totalHitCount;
+ this.totalGroupedHitCount = totalGroupedHitCount;
+ this.groups = groups;
+ }
+}
Added: lucene/dev/trunk/modules/grouping/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/overview.html?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/overview.html (added)
+++ lucene/dev/trunk/modules/grouping/src/java/overview.html Sat May 14 10:48:29 2011
@@ -0,0 +1,105 @@
+<html>
+<body>
+
+<p>This module enables search result grouping with Lucene, where hits
+with the same value in the specified single-valued group field are
+grouped together. For example, if you group by the <tt>author</tt>
+field, then all documents with the same value in the <tt>author</tt>
+field fall into a single group.</p>
+
+<p>Grouping requires a number of inputs:</p>
+
+ <ul>
+ <li> <tt>groupField</tt>: this is the field used for grouping.
+ For example, if you use the <tt>author</tt> field then each
+ group has all books by the same author. Documents that don't
+ have this field are grouped under a single group with
+ a <tt>null</tt> group value.
+
+ <li> <tt>groupSort</tt>: how the groups are sorted. For sorting
+ purposes, each group is "represented" by the highest-sorted
+ document according to the <tt>groupSort</tt> within it. For
+ example, if you specify "price" (ascending) then the first group
+ is the one with the lowest price book within it. Or if you
+ specify relevance group sort, then the first group is the one
+ containing the highest scoring book.
+
+ <li> <tt>topNGroups</tt>: how many top groups to keep. For
+ example, 10 means the top 10 groups are computed.
+
+ <li> <tt>groupOffset</tt>: which "slice" of top groups you want to
+ retrieve. For example, 3 means you'll get 7 groups back
+ (assuming <tt>topNGroups</tt> is 10). This is useful for
+ paging, where you might show 5 groups per page.
+
+ <li> <tt>withinGroupSort</tt>: how the documents within each group
+ are sorted. This can be different from the group sort.
+
+ <li> <tt>maxDocsPerGroup</tt>: how many top documents within each
+ group to keep.
+
+ <li> <tt>withinGroupOffset</tt>: which "slice" of top
+ documents you want to retrieve from each group.
+
+ </ul>
+
+<p>The implementation is two-pass: the first pass ({@link
+ org.apache.lucene.search.grouping.FirstPassGroupingCollector})
+ gathers the top groups, and the second pass ({@link
+ org.apache.lucene.search.grouping.SecondPassGroupingCollector})
+ gathers documents within those groups. If the search is costly to
+ run you may want to use the {@link
+ org.apache.lucene.search.grouping.CachingCollector} class, which
+ caches hits and can (quickly) replay them for the second pass. This
+ way you only run the query once, but you pay a RAM cost to (briefly)
+ hold all hits. Results are returned as a {@link
+ org.apache.lucene.search.grouping.TopGroups} instance.</p>
+
+<p>Known limitations:</p>
+<ul>
+ <li> The group field must be a single-valued indexed field.
+ {@link org.apache.lucene.search.FieldCache} is used to load the {@link org.apache.lucene.search.FieldCache.DocTermsIndex} for this field.
+ <li> Unlike Solr's implementation, this module cannot group by
+ function query values nor by arbitrary queries.
+ <li> Sharding is not directly supported, though is not too
+ difficult, if you can merge the top groups and top documents per
+ group yourself.
+</ul>
+
+<p>Typical usage looks like this (using the {@link org.apache.lucene.search.grouping.CachingCollector}):</p>
+
+<pre>
+ FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("author", groupSort, groupOffset+topNGroups);
+
+ boolean cacheScores = true;
+ double maxCacheRAMMB = 4.0;
+ CachingCollector cachedCollector = new CachingCollector(c1, cacheScores, maxCacheRAMMB);
+ s.search(new TermQuery(new Term("content", searchTerm)), cachedCollector);
+
+ Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
+
+ if (topGroups == null) {
+ // No groups matched
+ return;
+ }
+
+ boolean getScores = true;
+ boolean getMaxScores = true;
+ boolean fillFields = true;
+ SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("author", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+
+ if (cachedCollector.isCached()) {
+ // Cache fit within maxCacheRAMMB, so we can replay it:
+ cachedCollector.replay(c2);
+ } else {
+ // Cache was too large; must re-execute query:
+ s.search(new TermQuery(new Term("content", searchTerm)), c2);
+ }
+
+ TopGroups groupsResult = c2.getTopGroups(docOffset);
+
+ // Render groupsResult...
+</pre>
+
+</body>
+</html>
Added: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1103024&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (added)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Sat May 14 10:48:29 2011
@@ -0,0 +1,539 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.search.grouping;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.NumericField;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+// TODO
+// - should test relevance sort too
+// - test null
+// - test ties
+// - test compound sort
+
+public class TestGrouping extends LuceneTestCase {
+
+ public void testBasic() throws Exception {
+
+ final String groupField = "author";
+
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random,
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ // 0
+ Document doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 1
+ doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 2
+ doc = new Document();
+ doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 3
+ doc = new Document();
+ doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 4
+ doc = new Document();
+ doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 5
+ doc = new Document();
+ doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("content", "random", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ // 6 -- no author field
+ doc = new Document();
+ doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED));
+ doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO));
+ w.addDocument(doc);
+
+ IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
+ w.close();
+
+ final Sort groupSort = Sort.RELEVANCE;
+ final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(groupField, groupSort, 10);
+ indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
+
+ final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector(groupField, c1.getTopGroups(0, true), groupSort, null, 5, true, false, true);
+ indexSearcher.search(new TermQuery(new Term("content", "random")), c2);
+
+ final TopGroups groups = c2.getTopGroups(0);
+
+ assertEquals(7, groups.totalHitCount);
+ assertEquals(7, groups.totalGroupedHitCount);
+ assertEquals(4, groups.groups.length);
+
+ // relevance order: 5, 0, 3, 4, 1, 2, 6
+
+ // the later a document is added the higher this docId
+ // value
+ GroupDocs group = groups.groups[0];
+ assertEquals(new BytesRef("author3"), group.groupValue);
+ assertEquals(2, group.scoreDocs.length);
+ assertEquals(5, group.scoreDocs[0].doc);
+ assertEquals(4, group.scoreDocs[1].doc);
+ assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);
+
+ group = groups.groups[1];
+ assertEquals(new BytesRef("author1"), group.groupValue);
+ assertEquals(3, group.scoreDocs.length);
+ assertEquals(0, group.scoreDocs[0].doc);
+ assertEquals(1, group.scoreDocs[1].doc);
+ assertEquals(2, group.scoreDocs[2].doc);
+ assertTrue(group.scoreDocs[0].score > group.scoreDocs[1].score);
+ assertTrue(group.scoreDocs[1].score > group.scoreDocs[2].score);
+
+ group = groups.groups[2];
+ assertEquals(new BytesRef("author2"), group.groupValue);
+ assertEquals(1, group.scoreDocs.length);
+ assertEquals(3, group.scoreDocs[0].doc);
+
+ group = groups.groups[3];
+ assertNull(group.groupValue);
+ assertEquals(1, group.scoreDocs.length);
+ assertEquals(6, group.scoreDocs[0].doc);
+
+ indexSearcher.getIndexReader().close();
+ dir.close();
+ }
+
+ private static class GroupDoc {
+ final int id;
+ final BytesRef group;
+ final BytesRef sort1;
+ final BytesRef sort2;
+ final String content;
+
+ public GroupDoc(int id, BytesRef group, BytesRef sort1, BytesRef sort2, String content) {
+ this.id = id;
+ this.group = group;
+ this.sort1 = sort1;
+ this.sort2 = sort2;
+ this.content = content;
+ }
+ }
+
+ private Sort getRandomSort() {
+ final List<SortField> sortFields = new ArrayList<SortField>();
+ if (random.nextBoolean()) {
+ if (random.nextBoolean()) {
+ sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));
+ } else {
+ sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));
+ }
+ } else if (random.nextBoolean()) {
+ sortFields.add(new SortField("sort1", SortField.STRING, random.nextBoolean()));
+ sortFields.add(new SortField("sort2", SortField.STRING, random.nextBoolean()));
+ }
+ sortFields.add(new SortField("id", SortField.INT));
+ return new Sort(sortFields.toArray(new SortField[sortFields.size()]));
+ }
+
+ private Comparator<GroupDoc> getComparator(Sort sort) {
+ final SortField[] sortFields = sort.getSort();
+ return new Comparator<GroupDoc>() {
+ public int compare(GroupDoc d1, GroupDoc d2) {
+ for(SortField sf : sortFields) {
+ final int cmp;
+ if (sf.getField().equals("sort1")) {
+ cmp = d1.sort1.compareTo(d2.sort1);
+ } else if (sf.getField().equals("sort2")) {
+ cmp = d1.sort2.compareTo(d2.sort2);
+ } else {
+ assertEquals(sf.getField(), "id");
+ cmp = d1.id - d2.id;
+ }
+ if (cmp != 0) {
+ return sf.getReverse() ? -cmp : cmp;
+ }
+ }
+ // Our sort always fully tie breaks:
+ fail();
+ return 0;
+ }
+ };
+ }
+
+ private Comparable[] fillFields(GroupDoc d, Sort sort) {
+ final SortField[] sortFields = sort.getSort();
+ final Comparable[] fields = new Comparable[sortFields.length];
+ for(int fieldIDX=0;fieldIDX<sortFields.length;fieldIDX++) {
+ final Comparable c;
+ final SortField sf = sortFields[fieldIDX];
+ if (sf.getField().equals("sort1")) {
+ c = d.sort1;
+ } else if (sf.getField().equals("sort2")) {
+ c = d.sort2;
+ } else {
+ assertEquals("id", sf.getField());
+ c = new Integer(d.id);
+ }
+ fields[fieldIDX] = c;
+ }
+ return fields;
+ }
+
+ private TopGroups slowGrouping(GroupDoc[] groupDocs,
+ String searchTerm,
+ boolean fillFields,
+ boolean getScores,
+ boolean getMaxScores,
+ Sort groupSort,
+ Sort docSort,
+ int topNGroups,
+ int docsPerGroup,
+ int groupOffset,
+ int docOffset) {
+
+ final Comparator<GroupDoc> groupSortComp = getComparator(groupSort);
+
+ Arrays.sort(groupDocs, groupSortComp);
+ final HashMap<BytesRef,List<GroupDoc>> groups = new HashMap<BytesRef,List<GroupDoc>>();
+ final List<BytesRef> sortedGroups = new ArrayList<BytesRef>();
+ final List<Comparable[]> sortedGroupFields = new ArrayList<Comparable[]>();
+
+ int totalHitCount = 0;
+
+ for(GroupDoc d : groupDocs) {
+ // TODO: would be better to filter by searchTerm before sorting!
+ if (!d.content.equals(searchTerm)) {
+ continue;
+ }
+ totalHitCount++;
+ List<GroupDoc> l = groups.get(d.group);
+ if (l == null) {
+ sortedGroups.add(d.group);
+ if (fillFields) {
+ sortedGroupFields.add(fillFields(d, groupSort));
+ }
+ l = new ArrayList<GroupDoc>();
+ groups.put(d.group, l);
+ }
+ l.add(d);
+ }
+
+ if (groupOffset >= sortedGroups.size()) {
+ // slice is out of bounds
+ return null;
+ }
+
+ final int limit = Math.min(groupOffset + topNGroups, groups.size());
+
+ final Comparator<GroupDoc> docSortComp = getComparator(docSort);
+ final GroupDocs[] result = new GroupDocs[limit-groupOffset];
+ int totalGroupedHitCount = 0;
+ for(int idx=groupOffset;idx < limit;idx++) {
+ final BytesRef group = sortedGroups.get(idx);
+ final List<GroupDoc> docs = groups.get(group);
+ totalGroupedHitCount += docs.size();
+ Collections.sort(docs, docSortComp);
+ final ScoreDoc[] hits;
+ if (docs.size() > docOffset) {
+ final int docIDXLimit = Math.min(docOffset + docsPerGroup, docs.size());
+ hits = new ScoreDoc[docIDXLimit - docOffset];
+ for(int docIDX=docOffset; docIDX < docIDXLimit; docIDX++) {
+ final GroupDoc d = docs.get(docIDX);
+ final FieldDoc fd;
+ if (fillFields) {
+ fd = new FieldDoc(d.id, 0.0f, fillFields(d, docSort));
+ } else {
+ fd = new FieldDoc(d.id, 0.0f);
+ }
+ hits[docIDX-docOffset] = fd;
+ }
+ } else {
+ hits = new ScoreDoc[0];
+ }
+
+ result[idx-groupOffset] = new GroupDocs(0.0f,
+ docs.size(),
+ hits,
+ group,
+ fillFields ? sortedGroupFields.get(idx) : null);
+ }
+
+ return new TopGroups(groupSort.getSort(), docSort.getSort(), totalHitCount, totalGroupedHitCount, result);
+ }
+
+ public void testRandom() throws Exception {
+ for(int iter=0;iter<3;iter++) {
+
+ if (VERBOSE) {
+ System.out.println("TEST: iter=" + iter);
+ }
+
+ final int numDocs = _TestUtil.nextInt(random, 100, 1000) * RANDOM_MULTIPLIER;
+ //final int numDocs = _TestUtil.nextInt(random, 5, 20);
+
+ final int numGroups = _TestUtil.nextInt(random, 1, numDocs);
+
+ if (VERBOSE) {
+ System.out.println("TEST: numDocs=" + numDocs + " numGroups=" + numGroups);
+ }
+
+ final List<BytesRef> groups = new ArrayList<BytesRef>();
+ for(int i=0;i<numGroups;i++) {
+ groups.add(new BytesRef(_TestUtil.randomRealisticUnicodeString(random)));
+ //groups.add(new BytesRef(_TestUtil.randomSimpleString(random)));
+ }
+ final String[] contentStrings = new String[] {"a", "b", "c", "d"};
+
+ Directory dir = newDirectory();
+ RandomIndexWriter w = new RandomIndexWriter(
+ random,
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+
+ Document doc = new Document();
+ Document docNoGroup = new Document();
+ Field group = newField("group", "", Field.Index.NOT_ANALYZED);
+ doc.add(group);
+ Field sort1 = newField("sort1", "", Field.Index.NOT_ANALYZED);
+ doc.add(sort1);
+ docNoGroup.add(sort1);
+ Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED);
+ doc.add(sort2);
+ docNoGroup.add(sort2);
+ Field content = newField("content", "", Field.Index.NOT_ANALYZED);
+ doc.add(content);
+ docNoGroup.add(content);
+ NumericField id = new NumericField("id");
+ doc.add(id);
+ docNoGroup.add(id);
+ final GroupDoc[] groupDocs = new GroupDoc[numDocs];
+ for(int i=0;i<numDocs;i++) {
+ final BytesRef groupValue;
+ if (random.nextInt(24) == 17) {
+ // So we test the "doc doesn't have the group'd
+ // field" case:
+ groupValue = null;
+ } else {
+ groupValue = groups.get(random.nextInt(groups.size()));
+ }
+ final GroupDoc groupDoc = new GroupDoc(i,
+ groupValue,
+ groups.get(random.nextInt(groups.size())),
+ groups.get(random.nextInt(groups.size())),
+ contentStrings[random.nextInt(contentStrings.length)]);
+ if (VERBOSE) {
+ System.out.println(" doc content=" + groupDoc.content + " id=" + i + " group=" + (groupDoc.group == null ? "null" : groupDoc.group.utf8ToString()) + " sort1=" + groupDoc.sort1.utf8ToString() + " sort2=" + groupDoc.sort2.utf8ToString());
+ }
+
+ groupDocs[i] = groupDoc;
+ if (groupDoc.group != null) {
+ group.setValue(groupDoc.group.utf8ToString());
+ }
+ sort1.setValue(groupDoc.sort1.utf8ToString());
+ sort2.setValue(groupDoc.sort2.utf8ToString());
+ content.setValue(groupDoc.content);
+ id.setIntValue(groupDoc.id);
+ if (groupDoc.group == null) {
+ w.addDocument(docNoGroup);
+ } else {
+ w.addDocument(doc);
+ }
+ }
+
+ final IndexReader r = w.getReader();
+ w.close();
+
+ final IndexSearcher s = new IndexSearcher(r);
+
+ for(int searchIter=0;searchIter<100;searchIter++) {
+
+ if (VERBOSE) {
+ System.out.println("TEST: searchIter=" + searchIter);
+ }
+
+ final String searchTerm = contentStrings[random.nextInt(contentStrings.length)];
+ final boolean fillFields = random.nextBoolean();
+ final boolean getScores = random.nextBoolean();
+ final boolean getMaxScores = random.nextBoolean();
+ final Sort groupSort = getRandomSort();
+ // TODO: also test null (= sort by relevance)
+ final Sort docSort = getRandomSort();
+
+ final int topNGroups = _TestUtil.nextInt(random, 1, 30);
+ final int docsPerGroup = _TestUtil.nextInt(random, 1, 50);
+ final int groupOffset = _TestUtil.nextInt(random, 0, (topNGroups-1)/2);
+ //final int groupOffset = 0;
+
+ final int docOffset = _TestUtil.nextInt(random, 0, docsPerGroup-1);
+ //final int docOffset = 0;
+
+ final boolean doCache = random.nextBoolean();
+ if (VERBOSE) {
+ System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup);
+ }
+
+ final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
+ final CachingCollector cCache;
+ final Collector c;
+ if (doCache) {
+ final double maxCacheMB = random.nextDouble();
+ if (VERBOSE) {
+ System.out.println("TEST: maxCacheMB=" + maxCacheMB);
+ }
+ c = cCache = new CachingCollector(c1, true, maxCacheMB);
+ } else {
+ c = c1;
+ cCache = null;
+ }
+ s.search(new TermQuery(new Term("content", searchTerm)), c);
+
+ final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
+ final TopGroups groupsResult;
+
+ if (topGroups != null) {
+
+ if (VERBOSE) {
+ System.out.println("TEST: topGroups");
+ for (SearchGroup searchGroup : topGroups) {
+ System.out.println(" " + (searchGroup.groupValue == null ? "null" : searchGroup.groupValue.utf8ToString()) + ": " + Arrays.deepToString(searchGroup.sortValues));
+ }
+ }
+
+ final SecondPassGroupingCollector c2 = new SecondPassGroupingCollector("group", topGroups, groupSort, docSort, docOffset+docsPerGroup, getScores, getMaxScores, fillFields);
+ if (doCache) {
+ if (cCache.isCached()) {
+ if (VERBOSE) {
+ System.out.println("TEST: cache is intact");
+ }
+ cCache.replay(c2);
+ } else {
+ if (VERBOSE) {
+ System.out.println("TEST: cache was too large");
+ }
+ s.search(new TermQuery(new Term("content", searchTerm)), c2);
+ }
+ } else {
+ s.search(new TermQuery(new Term("content", searchTerm)), c2);
+ }
+
+ groupsResult = c2.getTopGroups(docOffset);
+ } else {
+ groupsResult = null;
+ if (VERBOSE) {
+ System.out.println("TEST: no results");
+ }
+ }
+
+ final TopGroups expectedGroups = slowGrouping(groupDocs, searchTerm, fillFields, getScores, getMaxScores, groupSort, docSort, topNGroups, docsPerGroup, groupOffset, docOffset);
+
+ try {
+ // NOTE: intentional but temporary field cache insanity!
+ assertEquals(FieldCache.DEFAULT.getInts(r, "id"), expectedGroups, groupsResult);
+ } finally {
+ FieldCache.DEFAULT.purge(r);
+ }
+ }
+
+ r.close();
+ dir.close();
+ }
+ }
+
+ private void assertEquals(int[] docIDtoID, TopGroups expected, TopGroups actual) {
+ if (expected == null) {
+ assertNull(actual);
+ return;
+ }
+ assertNotNull(actual);
+
+ assertEquals(expected.groups.length, actual.groups.length);
+ assertEquals(expected.totalHitCount, actual.totalHitCount);
+ assertEquals(expected.totalGroupedHitCount, actual.totalGroupedHitCount);
+
+ for(int groupIDX=0;groupIDX<expected.groups.length;groupIDX++) {
+ if (VERBOSE) {
+ System.out.println(" check groupIDX=" + groupIDX);
+ }
+ final GroupDocs expectedGroup = expected.groups[groupIDX];
+ final GroupDocs actualGroup = actual.groups[groupIDX];
+ assertEquals(expectedGroup.groupValue, actualGroup.groupValue);
+ assertEquals(expectedGroup.groupSortValues, actualGroup.groupSortValues);
+
+ // TODO
+ // assertEquals(expectedGroup.maxScore, actualGroup.maxScore);
+ assertEquals(expectedGroup.totalHits, actualGroup.totalHits);
+
+ final ScoreDoc[] expectedFDs = expectedGroup.scoreDocs;
+ final ScoreDoc[] actualFDs = actualGroup.scoreDocs;
+
+ assertEquals(expectedFDs.length, actualFDs.length);
+ for(int docIDX=0;docIDX<expectedFDs.length;docIDX++) {
+ final FieldDoc expectedFD = (FieldDoc) expectedFDs[docIDX];
+ final FieldDoc actualFD = (FieldDoc) actualFDs[docIDX];
+ assertEquals(expectedFD.doc, docIDtoID[actualFD.doc]);
+ // TODO
+ // assertEquals(expectedFD.score, actualFD.score);
+ assertEquals(expectedFD.fields, actualFD.fields);
+ }
+ }
+ }
+}