You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2011/05/18 20:51:02 UTC
svn commit: r1124378 - in /lucene/dev/branches/branch_3x/lucene: ./
contrib/grouping/src/test/org/apache/lucene/search/grouping/
src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/
Author: shaie
Date: Wed May 18 18:51:01 2011
New Revision: 1124378
URL: http://svn.apache.org/viewvc?rev=1124378&view=rev
Log:
LUCENE-3102: add no-wrap ability to CachingCollector
Modified:
lucene/dev/branches/branch_3x/lucene/CHANGES.txt
lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed May 18 18:51:01 2011
@@ -56,7 +56,7 @@ New features
PathHierarchyTokenizer (Olivier Favre via ryan)
* LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache
- document IDs and scores encountered during the search, and "reply" them to
+ document IDs and scores encountered during the search, and "replay" them to
another Collector. (Mike McCandless, Shai Erera)
API Changes
Modified: lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Wed May 18 18:51:01 2011
@@ -435,37 +435,64 @@ public class TestGrouping extends Lucene
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
}
- final AllGroupsCollector groupCountCollector;
+ final AllGroupsCollector allGroupsCollector;
if (doAllGroups) {
- groupCountCollector = new AllGroupsCollector("group");
+ allGroupsCollector = new AllGroupsCollector("group");
} else {
- groupCountCollector = null;
+ allGroupsCollector = null;
}
final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
+
+ final boolean useWrappingCollector = random.nextBoolean();
+
if (doCache) {
final double maxCacheMB = random.nextDouble();
if (VERBOSE) {
System.out.println("TEST: maxCacheMB=" + maxCacheMB);
}
- if (doAllGroups) {
- cCache = CachingCollector.create(c1, true, maxCacheMB);
- c = MultiCollector.wrap(cCache, groupCountCollector);
+ if (useWrappingCollector) {
+ if (doAllGroups) {
+ cCache = CachingCollector.create(c1, true, maxCacheMB);
+ c = MultiCollector.wrap(cCache, allGroupsCollector);
+ } else {
+ c = cCache = CachingCollector.create(c1, true, maxCacheMB);
+ }
} else {
- c = cCache = CachingCollector.create(c1, true, maxCacheMB);
+ // Collect only into cache, then replay multiple times:
+ c = cCache = CachingCollector.create(false, true, maxCacheMB);
}
- } else if (doAllGroups) {
- c = MultiCollector.wrap(c1, groupCountCollector);
- cCache = null;
} else {
- c = c1;
cCache = null;
+ if (doAllGroups) {
+ c = MultiCollector.wrap(c1, allGroupsCollector);
+ } else {
+ c = c1;
+ }
}
+
s.search(new TermQuery(new Term("content", searchTerm)), c);
+ if (doCache && !useWrappingCollector) {
+ if (cCache.isCached()) {
+ // Replay for first-pass grouping
+ cCache.replay(c1);
+ if (doAllGroups) {
+ // Replay for all groups:
+ cCache.replay(allGroupsCollector);
+ }
+ } else {
+ // Replay by re-running search:
+ s.search(new TermQuery(new Term("content", searchTerm)), c1);
+ if (doAllGroups) {
+ s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
+ }
+ }
+ }
+
final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
final TopGroups groupsResult;
@@ -497,7 +524,7 @@ public class TestGrouping extends Lucene
if (doAllGroups) {
TopGroups tempTopGroups = c2.getTopGroups(docOffset);
- groupsResult = new TopGroups(tempTopGroups, groupCountCollector.getGroupCount());
+ groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
} else {
groupsResult = c2.getTopGroups(docOffset);
}
Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java Wed May 18 18:51:01 2011
@@ -310,6 +310,48 @@ public abstract class CachingCollector e
protected int base;
protected int lastDocBase;
+ /**
+ * Creates a {@link CachingCollector} which does not wrap another collector.
+ * The cached documents and scores can later be {@link #replay(Collector)
+ * replayed}.
+ *
+ * @param acceptDocsOutOfOrder
+ * whether documents are allowed to be collected out-of-order
+ */
+ public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
+ Collector other = new Collector() {
+ @Override
+ public boolean acceptsDocsOutOfOrder() {
+ return acceptDocsOutOfOrder;
+ }
+
+ @Override
+ public void setScorer(Scorer scorer) throws IOException {}
+
+ @Override
+ public void collect(int doc) throws IOException {}
+
+ @Override
+ public void setNextReader(IndexReader reader, int docBase) throws IOException {}
+
+ };
+ return create(other, cacheScores, maxRAMMB);
+ }
+
+ /**
+ * Create a new {@link CachingCollector} that wraps the given collector and
+ * caches documents and scores up to the specified RAM threshold.
+ *
+ * @param other
+ * the Collector to wrap and delegate calls to.
+ * @param cacheScores
+ * whether to cache scores in addition to document IDs. Note that
+ * this increases the RAM consumed per doc
+ * @param maxRAMMB
+ * the maximum RAM in MB to consume for caching the documents and
+ * scores. If the collector exceeds the threshold, no documents and
+ * scores are cached.
+ */
public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
}
Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java Wed May 18 18:51:01 2011
@@ -171,5 +171,18 @@ public class TestCachingCollector extend
assertFalse(cc.isCached());
}
}
+
+ public void testNoWrappedCollector() throws Exception {
+ for (boolean cacheScores : new boolean[] { false, true }) {
+ // create w/ null wrapped collector, and test that the methods work
+ CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
+ cc.setNextReader(null, 0);
+ cc.setScorer(new MockScorer());
+ cc.collect(0);
+
+ assertTrue(cc.isCached());
+ cc.replay(new NoOpCollector(true));
+ }
+ }
}