You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2011/05/18 20:51:02 UTC

svn commit: r1124378 - in /lucene/dev/branches/branch_3x/lucene: ./ contrib/grouping/src/test/org/apache/lucene/search/grouping/ src/java/org/apache/lucene/search/ src/test/org/apache/lucene/search/

Author: shaie
Date: Wed May 18 18:51:01 2011
New Revision: 1124378

URL: http://svn.apache.org/viewvc?rev=1124378&view=rev
Log:
LUCENE-3102: add no-wrap ability to CachingCollector

Modified:
    lucene/dev/branches/branch_3x/lucene/CHANGES.txt
    lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
    lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
    lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java

Modified: lucene/dev/branches/branch_3x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/CHANGES.txt?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_3x/lucene/CHANGES.txt Wed May 18 18:51:01 2011
@@ -56,7 +56,7 @@ New features
   PathHierarchyTokenizer (Olivier Favre via ryan)
 
 * LUCENE-1421, LUCENE-3102: added CachingCollector which allow you to cache 
-  document IDs and scores encountered during the search, and "reply" them to 
+  document IDs and scores encountered during the search, and "replay" them to 
   another Collector. (Mike McCandless, Shai Erera)
   
 API Changes

Modified: lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/branches/branch_3x/lucene/contrib/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Wed May 18 18:51:01 2011
@@ -435,37 +435,64 @@ public class TestGrouping extends Lucene
           System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups);
         }
 
-        final AllGroupsCollector groupCountCollector;
+        final AllGroupsCollector allGroupsCollector;
         if (doAllGroups) {
-          groupCountCollector = new AllGroupsCollector("group");
+          allGroupsCollector = new AllGroupsCollector("group");
         } else {
-          groupCountCollector = null;
+          allGroupsCollector = null;
         }
 
         final FirstPassGroupingCollector c1 = new FirstPassGroupingCollector("group", groupSort, groupOffset+topNGroups);
         final CachingCollector cCache;
         final Collector c;
+
+        final boolean useWrappingCollector = random.nextBoolean();
+
         if (doCache) {
           final double maxCacheMB = random.nextDouble();
           if (VERBOSE) {
             System.out.println("TEST: maxCacheMB=" + maxCacheMB);
           }
 
-          if (doAllGroups) {
-            cCache = CachingCollector.create(c1, true, maxCacheMB);
-            c = MultiCollector.wrap(cCache, groupCountCollector);
+          if (useWrappingCollector) {
+            if (doAllGroups) {
+              cCache = CachingCollector.create(c1, true, maxCacheMB);              
+              c = MultiCollector.wrap(cCache, allGroupsCollector);
+            } else {
+              c = cCache = CachingCollector.create(c1, true, maxCacheMB);              
+            }
           } else {
-            c = cCache = CachingCollector.create(c1, true, maxCacheMB);
+            // Collect only into cache, then replay multiple times:
+            c = cCache = CachingCollector.create(false, true, maxCacheMB);
           }
-        } else if (doAllGroups) {
-          c = MultiCollector.wrap(c1, groupCountCollector);
-          cCache = null;
         } else {
-          c = c1;
           cCache = null;
+          if (doAllGroups) {
+            c = MultiCollector.wrap(c1, allGroupsCollector);
+          } else {
+            c = c1;
+          }
         }
+
         s.search(new TermQuery(new Term("content", searchTerm)), c);
 
+        if (doCache && !useWrappingCollector) {
+          if (cCache.isCached()) {
+            // Replay for first-pass grouping
+            cCache.replay(c1);
+            if (doAllGroups) {
+              // Replay for all groups:
+              cCache.replay(allGroupsCollector);
+            }
+          } else {
+            // Replay by re-running search:
+            s.search(new TermQuery(new Term("content", searchTerm)), c1);
+            if (doAllGroups) {
+              s.search(new TermQuery(new Term("content", searchTerm)), allGroupsCollector);
+            }
+          }
+        }
+
         final Collection<SearchGroup> topGroups = c1.getTopGroups(groupOffset, fillFields);
         final TopGroups groupsResult;
 
@@ -497,7 +524,7 @@ public class TestGrouping extends Lucene
         
           if (doAllGroups) {
             TopGroups tempTopGroups = c2.getTopGroups(docOffset);
-            groupsResult = new TopGroups(tempTopGroups, groupCountCollector.getGroupCount());
+            groupsResult = new TopGroups(tempTopGroups, allGroupsCollector.getGroupCount());
           } else {
             groupsResult = c2.getTopGroups(docOffset);
           }

Modified: lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/java/org/apache/lucene/search/CachingCollector.java Wed May 18 18:51:01 2011
@@ -310,6 +310,48 @@ public abstract class CachingCollector e
   protected int base;
   protected int lastDocBase;
 
+  /**
+   * Creates a {@link CachingCollector} which does not wrap another collector.
+   * The cached documents and scores can later be {@link #replay(Collector)
+   * replayed}.
+   * 
+   * @param acceptDocsOutOfOrder
+   *          whether documents are allowed to be collected out-of-order
+   */
+  public static CachingCollector create(final boolean acceptDocsOutOfOrder, boolean cacheScores, double maxRAMMB) {
+    Collector other = new Collector() {
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return acceptDocsOutOfOrder;
+      }
+      
+      @Override
+      public void setScorer(Scorer scorer) throws IOException {}
+
+      @Override
+      public void collect(int doc) throws IOException {}
+
+      @Override
+      public void setNextReader(IndexReader reader, int docBase) throws IOException {}
+
+    };
+    return create(other, cacheScores, maxRAMMB);
+  }
+
+  /**
+   * Create a new {@link CachingCollector} that wraps the given collector and
+   * caches documents and scores up to the specified RAM threshold.
+   * 
+   * @param other
+   *          the Collector to wrap and delegate calls to.
+   * @param cacheScores
+   *          whether to cache scores in addition to document IDs. Note that
+   *          this increases the RAM consumed per doc
+   * @param maxRAMMB
+   *          the maximum RAM in MB to consume for caching the documents and
+   *          scores. If the collector exceeds the threshold, no documents and
+   *          scores are cached.
+   */
   public static CachingCollector create(Collector other, boolean cacheScores, double maxRAMMB) {
     return cacheScores ? new ScoreCachingCollector(other, maxRAMMB) : new NoScoreCachingCollector(other, maxRAMMB);
   }

Modified: lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java?rev=1124378&r1=1124377&r2=1124378&view=diff
==============================================================================
--- lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java (original)
+++ lucene/dev/branches/branch_3x/lucene/src/test/org/apache/lucene/search/TestCachingCollector.java Wed May 18 18:51:01 2011
@@ -171,5 +171,18 @@ public class TestCachingCollector extend
       assertFalse(cc.isCached());
     }
   }
+
+  public void testNoWrappedCollector() throws Exception {
+    for (boolean cacheScores : new boolean[] { false, true }) {
+      // create w/ null wrapped collector, and test that the methods work
+      CachingCollector cc = CachingCollector.create(true, cacheScores, 50 * ONE_BYTE);
+      cc.setNextReader(null, 0);
+      cc.setScorer(new MockScorer());
+      cc.collect(0);
+      
+      assertTrue(cc.isCached());
+      cc.replay(new NoOpCollector(true));
+    }
+  }
   
 }