You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2012/03/15 10:31:07 UTC

svn commit: r1300860 - in /lucene/dev/trunk: lucene/contrib/ modules/grouping/src/java/org/apache/lucene/search/grouping/ modules/grouping/src/java/org/apache/lucene/search/grouping/dv/ modules/grouping/src/java/org/apache/lucene/search/grouping/term/ ...

Author: mvg
Date: Thu Mar 15 09:31:06 2012
New Revision: 1300860

URL: http://svn.apache.org/viewvc?rev=1300860&view=rev
Log:
LUCENE-3856: Added docvalues based grouped facet collector.

Added:
    lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
    lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java
      - copied, changed from r1300853, lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java
Removed:
    lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java
Modified:
    lucene/dev/trunk/lucene/contrib/CHANGES.txt
    lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
    lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java

Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Thu Mar 15 09:31:06 2012
@@ -72,7 +72,7 @@ New Features
    start/endOffset, if offsets are indexed. (Alan Woodward via Mike
    McCandless)
 
- * LUCENE-3802: Support for grouped faceting. (Martijn van Groningen)
+ * LUCENE-3802, LUCENE-3856: Support for grouped faceting. (Martijn van Groningen)
 
 API Changes
 

Modified: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java (original)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.search.groupin
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
 
 import java.io.IOException;
 import java.util.*;
@@ -34,11 +35,18 @@ public abstract class AbstractGroupFacet
   protected final String groupField;
   protected final String facetField;
   protected final BytesRef facetPrefix;
+  protected final List<SegmentResult> segmentResults;
+
+  protected int[] segmentFacetCounts;
+  protected int segmentTotalCount;
+  protected int startFacetOrd;
+  protected int endFacetOrd;
 
   protected AbstractGroupFacetCollector(String groupField, String facetField, BytesRef facetPrefix) {
     this.groupField = groupField;
     this.facetField = facetField;
     this.facetPrefix = facetPrefix;
+    segmentResults = new ArrayList<SegmentResult>();
   }
 
   /**
@@ -52,7 +60,49 @@ public abstract class AbstractGroupFacet
    * @return grouped facet results
    * @throws IOException If I/O related errors occur during merging segment grouped facet counts.
    */
-  public abstract GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException;
+  public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException {
+    if (segmentFacetCounts != null) {
+      segmentResults.add(createSegmentResult());
+      segmentFacetCounts = null; // reset
+    }
+
+    int totalCount = 0;
+    int missingCount = 0;
+    SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size());
+    for (SegmentResult segmentResult : segmentResults) {
+      missingCount += segmentResult.missing;
+      if (segmentResult.mergePos >= segmentResult.maxTermPos) {
+        continue;
+      }
+      totalCount += segmentResult.total;
+      segments.add(segmentResult);
+    }
+
+    GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
+    while (segments.size() > 0) {
+      SegmentResult segmentResult = segments.top();
+      BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm);
+      int count = 0;
+
+      do {
+        count += segmentResult.counts[segmentResult.mergePos++];
+        if (segmentResult.mergePos < segmentResult.maxTermPos) {
+          segmentResult.nextTerm();
+          segmentResult = segments.updateTop();
+        } else {
+          segments.pop();
+          segmentResult = segments.top();
+          if (segmentResult == null) {
+            break;
+          }
+        }
+      } while (currentFacetValue.equals(segmentResult.mergeTerm));
+      facetResult.addFacetCount(currentFacetValue, count);
+    }
+    return facetResult;
+  }
+
+  protected abstract SegmentResult createSegmentResult() throws IOException;
 
   public void setScorer(Scorer scorer) throws IOException {
   }
@@ -221,4 +271,45 @@ public abstract class AbstractGroupFacet
     }
   }
 
+  /**
+   * Contains the local grouped segment counts for a particular segment.
+   * Each <code>SegmentResult</code> must be added together.
+   */
+  protected abstract static class SegmentResult {
+
+    protected final int[] counts;
+    protected final int total;
+    protected final int missing;
+    protected final int maxTermPos;
+
+    protected BytesRef mergeTerm;
+    protected int mergePos;
+
+    protected SegmentResult(int[] counts, int total, int missing, int maxTermPos) {
+      this.counts = counts;
+      this.total = total;
+      this.missing = missing;
+      this.maxTermPos = maxTermPos;
+    }
+
+    /**
+     * Go to next term in this <code>SegmentResult</code> in order to retrieve the grouped facet counts.
+     *
+     * @throws IOException If I/O related errors occur
+     */
+    protected abstract void nextTerm() throws IOException;
+
+  }
+
+  private static class SegmentResultPriorityQueue extends PriorityQueue<SegmentResult> {
+
+    SegmentResultPriorityQueue(int maxSize) {
+      super(maxSize);
+    }
+
+    protected boolean lessThan(SegmentResult a, SegmentResult b) {
+      return a.mergeTerm.compareTo(b.mergeTerm) < 0;
+    }
+  }
+
 }

Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java?rev=1300860&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -0,0 +1,288 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.SentinelIntSet;
+import org.apache.lucene.util.UnicodeUtil;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on docvalues.
+ *
+ * @lucene.experimental
+ */
+public abstract class DVGroupFacetCollector extends AbstractGroupFacetCollector {
+
+  final DocValues.Type groupDvType;
+  final boolean groupDiskResident;
+  final DocValues.Type facetFieldDvType;
+  final boolean facetDiskResident;
+
+  final List<GroupedFacetHit> groupedFacetHits;
+  final SentinelIntSet segmentGroupedFacetHits;
+
+  /**
+   * Factory method for creating the right implementation based on the group docvalues type and the facet docvalues
+   * type.
+   *
+   * Currently only the {@link DocValues.Type#BYTES_VAR_SORTED} and the {@link DocValues.Type#BYTES_FIXED_SORTED} are
+   * the only docvalues type supported for both the group and facet field.
+   *
+   * @param groupField        The group field
+   * @param groupDvType       The docvalues type for the group field
+   * @param groupDiskResident Whether the group docvalues should be disk resident
+   * @param facetField        The facet field
+   * @param facetDvType       The docvalues type for the facet field
+   * @param facetDiskResident Whether the facet docvalues should be disk resident
+   * @param facetPrefix       The facet prefix a facet entry should start with to be included.
+   * @param initialSize       The initial allocation size of the internal int set and group facet list which should roughly
+   *                          match the total number of expected unique groups. Be aware that the heap usage is
+   *                          4 bytes * initialSize.
+   * @return a <code>DVGroupFacetCollector</code> implementation
+   */
+  public static DVGroupFacetCollector createDvGroupFacetCollector(String groupField,
+                                                                  DocValues.Type groupDvType,
+                                                                  boolean groupDiskResident,
+                                                                  String facetField,
+                                                                  DocValues.Type facetDvType,
+                                                                  boolean facetDiskResident,
+                                                                  BytesRef facetPrefix,
+                                                                  int initialSize) {
+    switch (groupDvType) {
+      case VAR_INTS:
+      case FIXED_INTS_8:
+      case FIXED_INTS_16:
+      case FIXED_INTS_32:
+      case FIXED_INTS_64:
+      case FLOAT_32:
+      case FLOAT_64:
+      case BYTES_FIXED_STRAIGHT:
+      case BYTES_FIXED_DEREF:
+      case BYTES_VAR_STRAIGHT:
+      case BYTES_VAR_DEREF:
+        throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+      case BYTES_VAR_SORTED:
+      case BYTES_FIXED_SORTED:
+        return GroupSortedBR.createGroupSortedFacetCollector(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
+      default:
+        throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+    }
+  }
+
+  DVGroupFacetCollector(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) {
+    super(groupField, facetField, facetPrefix);
+    this.groupDvType = groupDvType;
+    this.groupDiskResident = groupDiskResident;
+    this.facetFieldDvType = facetFieldDvType;
+    this.facetDiskResident = facetDiskResident;
+    groupedFacetHits = new ArrayList<GroupedFacetHit>(initialSize);
+    segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1);
+  }
+
+  static abstract class GroupSortedBR extends DVGroupFacetCollector {
+
+    final BytesRef facetSpare = new BytesRef();
+    final BytesRef groupSpare = new BytesRef();
+    DocValues.SortedSource groupFieldSource;
+
+    GroupSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) {
+      super(groupField, groupDvType, groupDiskResident, facetField, facetFieldDvType, facetDiskResident, facetPrefix, initialSize);
+    }
+
+    static DVGroupFacetCollector createGroupSortedFacetCollector(String groupField,
+                                                                 DocValues.Type groupDvType,
+                                                                 boolean groupDiskResident,
+                                                                 String facetField,
+                                                                 DocValues.Type facetDvType,
+                                                                 boolean facetDiskResident,
+                                                                 BytesRef facetPrefix,
+                                                                 int initialSize) {
+      switch (facetDvType) {
+        case VAR_INTS:
+        case FIXED_INTS_8:
+        case FIXED_INTS_16:
+        case FIXED_INTS_32:
+        case FIXED_INTS_64:
+        case FLOAT_32:
+        case FLOAT_64:
+        case BYTES_FIXED_STRAIGHT:
+        case BYTES_FIXED_DEREF:
+        case BYTES_VAR_STRAIGHT:
+        case BYTES_VAR_DEREF:
+          throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+        case BYTES_VAR_SORTED:
+        case BYTES_FIXED_SORTED:
+          return new FacetSortedBR(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
+        default:
+          throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+      }
+    }
+
+
+    static class FacetSortedBR extends GroupSortedBR {
+
+      private DocValues.SortedSource facetFieldSource;
+
+      FacetSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetDvType, boolean diskResident, BytesRef facetPrefix, int initialSize) {
+        super(groupField, groupDvType, groupDiskResident, facetField, facetDvType, diskResident, facetPrefix, initialSize);
+      }
+
+      public void collect(int doc) throws IOException {
+        int facetOrd = facetFieldSource.ord(doc);
+        if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
+          return;
+        }
+
+        int groupOrd = groupFieldSource.ord(doc);
+        int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
+        if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
+          return;
+        }
+
+        segmentTotalCount++;
+        segmentFacetCounts[facetOrd]++;
+
+        segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
+        groupedFacetHits.add(
+            new GroupedFacetHit(
+                groupFieldSource.getByOrd(groupOrd, new BytesRef()),
+                facetFieldSource.getByOrd(facetOrd, new BytesRef())
+            )
+        );
+      }
+
+      public void setNextReader(AtomicReaderContext context) throws IOException {
+        if (segmentFacetCounts != null) {
+          segmentResults.add(createSegmentResult());
+        }
+
+        groupFieldSource = getDocValuesSortedSource(groupField, groupDvType, groupDiskResident, context.reader());
+        facetFieldSource = getDocValuesSortedSource(facetField, facetFieldDvType, facetDiskResident, context.reader());
+        segmentFacetCounts = new int[facetFieldSource.getValueCount()];
+        segmentTotalCount = 0;
+
+        segmentGroupedFacetHits.clear();
+        for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
+          int facetOrd = facetFieldSource.getOrdByValue(groupedFacetHit.facetValue, facetSpare);
+          if (facetOrd < 0) {
+            continue;
+          }
+
+          int groupOrd = groupFieldSource.getOrdByValue(groupedFacetHit.groupValue, groupSpare);
+          if (groupOrd < 0) {
+            continue;
+          }
+
+          int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
+          segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
+        }
+
+        if (facetPrefix != null) {
+          startFacetOrd = facetFieldSource.getOrdByValue(facetPrefix, facetSpare);
+          if (startFacetOrd < 0) {
+            // Points to the ord one higher than facetPrefix
+            startFacetOrd = -startFacetOrd - 1;
+          }
+          BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix);
+          facetEndPrefix.append(UnicodeUtil.BIG_TERM);
+          endFacetOrd = facetFieldSource.getOrdByValue(facetEndPrefix, facetSpare);
+          endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
+        } else {
+          startFacetOrd = 0;
+          endFacetOrd = facetFieldSource.getValueCount();
+        }
+      }
+
+      protected SegmentResult createSegmentResult() throws IOException {
+        if (startFacetOrd == 0 && facetFieldSource.getByOrd(startFacetOrd, facetSpare).length == 0) {
+          int missing = segmentFacetCounts[0];
+          int total = segmentTotalCount - segmentFacetCounts[0];
+          return new SegmentResult(segmentFacetCounts, total, missing, facetFieldSource, endFacetOrd);
+        } else {
+          return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldSource, startFacetOrd, endFacetOrd);
+        }
+      }
+
+      private DocValues.SortedSource getDocValuesSortedSource(String field, DocValues.Type dvType, boolean diskResident, AtomicReader reader) throws IOException {
+        DocValues dv = reader.docValues(field);
+        DocValues.Source dvSource;
+        if (dv != null) {
+          dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+        } else {
+          dvSource = DocValues.getDefaultSortedSource(dvType, reader.maxDoc());
+        }
+        return dvSource.asSortedSource();
+      }
+
+      private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
+
+        final DocValues.SortedSource facetFieldSource;
+        final BytesRef spare = new BytesRef();
+
+        SegmentResult(int[] counts, int total, int missing, DocValues.SortedSource facetFieldSource, int endFacetOrd) {
+          super(counts, total, missing, endFacetOrd);
+          this.facetFieldSource = facetFieldSource;
+          this.mergePos = 1;
+          if (mergePos < maxTermPos) {
+            mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+          }
+        }
+
+        SegmentResult(int[] counts, int total, DocValues.SortedSource facetFieldSource, int startFacetOrd, int endFacetOrd) {
+          super(counts, total, 0, endFacetOrd);
+          this.facetFieldSource = facetFieldSource;
+          this.mergePos = startFacetOrd;
+          if (mergePos < maxTermPos) {
+            mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+          }
+        }
+
+        /**
+         * {@inheritDoc}
+         */
+        protected void nextTerm() throws IOException {
+          mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+        }
+
+      }
+
+    }
+
+  }
+
+}
+
+class GroupedFacetHit {
+
+  final BytesRef groupValue;
+  final BytesRef facetValue;
+
+  GroupedFacetHit(BytesRef groupValue, BytesRef facetValue) {
+    this.groupValue = groupValue;
+    this.facetValue = facetValue;
+  }
+}

Modified: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java (original)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -38,14 +38,9 @@ public abstract class TermGroupFacetColl
 
   final List<GroupedFacetHit> groupedFacetHits;
   final SentinelIntSet segmentGroupedFacetHits;
-  final List<SegmentResult> segmentResults;
   final BytesRef spare = new BytesRef();
 
   FieldCache.DocTermsIndex groupFieldTermsIndex;
-  int[] segmentFacetCounts;
-  int segmentTotalCount;
-  int startFacetOrd;
-  int endFacetOrd;
 
   /**
    * Factory method for creating the right implementation based on the fact whether the facet field contains
@@ -76,57 +71,8 @@ public abstract class TermGroupFacetColl
     super(groupField, facetField, facetPrefix);
     groupedFacetHits = new ArrayList<GroupedFacetHit>(initialSize);
     segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1);
-    segmentResults = new ArrayList<SegmentResult>();
   }
 
-  /**
-   * {@inheritDoc}
-   */
-  public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException {
-    if (segmentFacetCounts != null) {
-      segmentResults.add(createSegmentResult());
-      segmentFacetCounts = null; // reset
-    }
-
-    int totalCount = 0;
-    int missingCount = 0;
-    SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size());
-    for (SegmentResult segmentResult : segmentResults) {
-      missingCount += segmentResult.missing;
-      if (segmentResult.mergePos >= segmentResult.maxTermPos) {
-        continue;
-      }
-      totalCount += segmentResult.total;
-      segmentResult.initializeForMerge();
-      segments.add(segmentResult);
-    }
-
-    GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
-    while (segments.size() > 0) {
-      SegmentResult segmentResult = segments.top();
-      BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm);
-      int count = 0;
-
-      do {
-        count += segmentResult.counts[segmentResult.mergePos++];
-        if (segmentResult.mergePos < segmentResult.maxTermPos) {
-          segmentResult.nextTerm();
-          segmentResult = segments.updateTop();
-        } else {
-          segments.pop();
-          segmentResult = segments.top();
-          if (segmentResult == null) {
-            break;
-          }
-        }
-      } while (currentFacetValue.equals(segmentResult.mergeTerm));
-      facetResult.addFacetCount(currentFacetValue, count);
-    }
-    return facetResult;
-  }
-
-  protected abstract SegmentResult createSegmentResult();
-
   // Implementation for single valued facet fields.
   static class SV extends TermGroupFacetCollector {
 
@@ -202,9 +148,30 @@ public abstract class TermGroupFacetColl
       }
     }
 
-    protected SegmentResult createSegmentResult() {
+    protected SegmentResult createSegmentResult() throws IOException {
       return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.getTermsEnum(), startFacetOrd, endFacetOrd);
     }
+
+    private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
+
+      final TermsEnum tenum;
+
+      SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd) throws IOException {
+        super(counts, total - counts[0], counts[0], endFacetOrd);
+        this.tenum = tenum;
+        this.mergePos = startFacetOrd == 0 ? 1 : startFacetOrd;
+        if (mergePos < maxTermPos) {
+          tenum.seekExact(mergePos);
+          mergeTerm = tenum.term();
+        }
+      }
+
+      protected void nextTerm() throws IOException {
+        mergeTerm = tenum.next();
+      }
+
+    }
+
   }
 
   // Implementation for multi valued facet fields.
@@ -316,54 +283,28 @@ public abstract class TermGroupFacetColl
       }
     }
 
-    protected SegmentResult createSegmentResult() {
+    protected SegmentResult createSegmentResult() throws IOException {
       return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldDocTermOrds.numTerms(), facetOrdTermsEnum, startFacetOrd, endFacetOrd);
     }
-  }
 
-}
+    private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
 
-class SegmentResult {
-
-  final int[] counts;
-  final int total;
-  final int missing;
-
-  // Used for merging the segment results
-  BytesRef mergeTerm;
-  int mergePos;
-  final int maxTermPos;
-  final TermsEnum tenum;
-
-  SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd) {
-    this.counts = counts;
-    this.missing = counts[0];
-    this.total = total - missing;
-    this.tenum = tenum;
-    this.mergePos = startFacetOrd == 0 ? 1 : startFacetOrd;
-    this.maxTermPos = endFacetOrd;
-  }
+      final TermsEnum tenum;
 
-  SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd) {
-    this.counts = counts;
-    this.missing = counts[missingCountIndex];
-    this.total = total - missing;
-    this.tenum = tenum;
-    this.mergePos = startFacetOrd;
-    if (endFacetOrd == missingCountIndex + 1) {
-      this.maxTermPos = missingCountIndex;
-    } else {
-      this.maxTermPos = endFacetOrd;
-    }
-  }
+      SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd) throws IOException {
+        super(counts, total - counts[missingCountIndex], counts[missingCountIndex],
+            endFacetOrd == missingCountIndex + 1 ?  missingCountIndex : endFacetOrd);
+        this.tenum = tenum;
+        this.mergePos = startFacetOrd;
+        tenum.seekExact(mergePos);
+        mergeTerm = tenum.term();
+      }
 
-  void initializeForMerge() throws IOException {
-    tenum.seekExact(mergePos);
-    mergeTerm = tenum.term();
-  }
+      protected void nextTerm() throws IOException {
+        mergeTerm = tenum.next();
+      }
 
-  void nextTerm() throws IOException {
-    mergeTerm = tenum.next();
+    }
   }
 
 }
@@ -377,15 +318,4 @@ class GroupedFacetHit {
     this.groupValue = groupValue;
     this.facetValue = facetValue;
   }
-}
-
-class SegmentResultPriorityQueue extends PriorityQueue<SegmentResult> {
-
-  SegmentResultPriorityQueue(int maxSize) {
-    super(maxSize);
-  }
-
-  protected boolean lessThan(SegmentResult a, SegmentResult b) {
-    return a.mergeTerm.compareTo(b.mergeTerm) < 0;
-  }
-}
+}
\ No newline at end of file

Copied: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (from r1300853, lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java?p2=lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java&p1=lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java&r1=1300853&r2=1300860&rev=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java (original)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java Thu Mar 15 09:31:06 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.grouping.dv.DVGroupFacetCollector;
 import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
@@ -34,7 +35,7 @@ import org.apache.lucene.util._TestUtil;
 import java.io.IOException;
 import java.util.*;
 
-public class TermGroupFacetCollectorTest extends AbstractGroupingTestCase {
+public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
 
   public void testSimple() throws Exception {
     final String groupField = "hotel";
@@ -47,47 +48,47 @@ public class TermGroupFacetCollectorTest
         dir,
         newIndexWriterConfig(TEST_VERSION_CURRENT,
             new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
-    boolean canUseIDV = false;// Enable later... !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+    boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+    boolean useDv = canUseDV && random.nextBoolean();
 
     // 0
     Document doc = new Document();
-    addGroupField(doc, groupField, "a", canUseIDV);
-    doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "a", canUseDV);
+    addField(doc, "airport", "ams", canUseDV);
+    addField(doc, "duration", "5", canUseDV);
     w.addDocument(doc);
 
     // 1
     doc = new Document();
-    addGroupField(doc, groupField, "a", canUseIDV);
-    doc.add(new Field("airport", "dus", TextField.TYPE_STORED));
-    doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "a", canUseDV);
+    addField(doc, "airport", "dus", canUseDV);
+    addField(doc, "duration", "10", canUseDV);
     w.addDocument(doc);
 
     // 2
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "airport", "ams", canUseDV);
+    addField(doc, "duration", "10", canUseDV);
     w.addDocument(doc);
     w.commit(); // To ensure a second segment
 
     // 3
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "airport", "ams", canUseDV);
+    addField(doc, "duration", "5", canUseDV);
     w.addDocument(doc);
 
     // 4
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "airport", "ams", canUseDV);
+    addField(doc, "duration", "5", canUseDV);
     w.addDocument(doc);
 
     IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
-    TermGroupFacetCollector groupedAirportFacetCollector =
-        TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128);
+    AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
     TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
     assertEquals(3, airportResult.getTotalCount());
@@ -101,8 +102,7 @@ public class TermGroupFacetCollectorTest
     assertEquals(1, entries.get(1).getCount());
 
 
-    TermGroupFacetCollector groupedDurationFacetCollector =
-        TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128);
+    AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
     TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
     assertEquals(4, durationResult.getTotalCount());
@@ -117,34 +117,34 @@ public class TermGroupFacetCollectorTest
 
     // 5
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "duration", "5", canUseDV);
     w.addDocument(doc);
 
     // 6
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "airport", "bru", canUseDV);
+    addField(doc, "duration", "10", canUseDV);
     w.addDocument(doc);
 
     // 7
     doc = new Document();
-    addGroupField(doc, groupField, "b", canUseIDV);
-    doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "b", canUseDV);
+    addField(doc, "airport", "bru", canUseDV);
+    addField(doc, "duration", "15", canUseDV);
     w.addDocument(doc);
 
     // 8
     doc = new Document();
-    addGroupField(doc, groupField, "a", canUseIDV);
-    doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "a", canUseDV);
+    addField(doc, "airport", "bru", canUseDV);
+    addField(doc, "duration", "10", canUseDV);
     w.addDocument(doc);
 
     indexSearcher.getIndexReader().close();
     indexSearcher = new IndexSearcher(w.getReader());
-    groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", true, null, 128);
+    groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
     airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
     assertEquals(5, airportResult.getTotalCount());
@@ -157,7 +157,7 @@ public class TermGroupFacetCollectorTest
     assertEquals("dus", entries.get(1).getValue().utf8ToString());
     assertEquals(1, entries.get(1).getCount());
 
-    groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128);
+    groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
     durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
     assertEquals(5, durationResult.getTotalCount());
@@ -170,21 +170,21 @@ public class TermGroupFacetCollectorTest
 
     // 9
     doc = new Document();
-    addGroupField(doc, groupField, "c", canUseIDV);
-    doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "c", canUseDV);
+    addField(doc, "airport", "bru", canUseDV);
+    addField(doc, "duration", "15", canUseDV);
     w.addDocument(doc);
 
     // 10
     doc = new Document();
-    addGroupField(doc, groupField, "c", canUseIDV);
-    doc.add(new Field("airport", "dus", TextField.TYPE_UNSTORED));
-    doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+    addField(doc, groupField, "c", canUseDV);
+    addField(doc, "airport", "dus", canUseDV);
+    addField(doc, "duration", "10", canUseDV);
     w.addDocument(doc);
 
     indexSearcher.getIndexReader().close();
     indexSearcher = new IndexSearcher(w.getReader());
-    groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128);
+    groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
     airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
     assertEquals(7, airportResult.getTotalCount());
@@ -199,7 +199,7 @@ public class TermGroupFacetCollectorTest
     assertEquals("dus", entries.get(2).getValue().utf8ToString());
     assertEquals(2, entries.get(2).getCount());
 
-    groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, new BytesRef("1"), 128);
+    groupedDurationFacetCollector = createRandomCollector(groupField, "duration", "1", false, useDv);
     indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
     durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
     assertEquals(5, durationResult.getTotalCount());
@@ -217,10 +217,10 @@ public class TermGroupFacetCollectorTest
     dir.close();
   }
 
-  private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
-    doc.add(new Field(groupField, value, TextField.TYPE_UNSTORED));
+  private void addField(Document doc, String field, String value, boolean canUseIDV) {
+    doc.add(new Field(field, value, StringField.TYPE_UNSTORED));
     if (canUseIDV) {
-      doc.add(new DocValuesField(groupField, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED));
+      doc.add(new DocValuesField(field, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED));
     }
   }
 
@@ -232,6 +232,7 @@ public class TermGroupFacetCollectorTest
       final IndexSearcher searcher = newSearcher(context.indexReader);
 
       for (int searchIter = 0; searchIter < 100; searchIter++) {
+        boolean useDv = context.useDV && random.nextBoolean();
         String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
         int limit = random.nextInt(context.facetValues.size());
         int offset = random.nextInt(context.facetValues.size() - limit);
@@ -254,7 +255,7 @@ public class TermGroupFacetCollectorTest
         }
 
         GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
-        TermGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument);
+        AbstractGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument, useDv);
         searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
         TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
 
@@ -357,19 +358,37 @@ public class TermGroupFacetCollectorTest
             new MockAnalyzer(random)
         )
     );
+    boolean canUseDV = !"Lucene3x".equals(writer.w.getConfig().getCodec().getName());
+    boolean useDv = canUseDV && random.nextBoolean();
 
     Document doc = new Document();
     Document docNoGroup = new Document();
     Document docNoFacet = new Document();
     Document docNoGroupNoFacet = new Document();
     Field group = newField("group", "", StringField.TYPE_UNSTORED);
+    DocValuesField groupDc = new DocValuesField("group", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+    if (useDv) {
+      doc.add(groupDc);
+      docNoFacet.add(groupDc);
+    }
     doc.add(group);
     docNoFacet.add(group);
-    Field[] facetFields = multipleFacetValuesPerDocument? new Field[2 + random.nextInt(6)] : new Field[1];
-    for (int i = 0; i < facetFields.length; i++) {
-      facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED);
-      doc.add(facetFields[i]);
-      docNoGroup.add(facetFields[i]);
+    Field[] facetFields;
+    if (useDv) {
+      facetFields = new Field[2];
+      facetFields[0] = newField("facet", "", StringField.TYPE_UNSTORED);
+      doc.add(facetFields[0]);
+      docNoGroup.add(facetFields[0]);
+      facetFields[1] = new DocValuesField("facet", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+      doc.add(facetFields[1]);
+      docNoGroup.add(facetFields[1]);
+    } else {
+      facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
+      for (int i = 0; i < facetFields.length; i++) {
+        facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED);
+        doc.add(facetFields[i]);
+        docNoGroup.add(facetFields[i]);
+      }
     }
     Field content = newField("content", "", StringField.TYPE_UNSTORED);
     doc.add(content);
@@ -412,7 +431,7 @@ public class TermGroupFacetCollectorTest
 
       List<String> facetVals = new ArrayList<String>();
       if (random.nextInt(24) != 18) {
-        for (Field facetField : facetFields) {
+        if (useDv) {
           String facetValue = facetValues.get(random.nextInt(facetValues.size()));
           uniqueFacetValues.add(facetValue);
           if (!facetToGroups.containsKey(facetValue)) {
@@ -423,8 +442,24 @@ public class TermGroupFacetCollectorTest
           if (groupsInFacet.size() > facetWithMostGroups) {
             facetWithMostGroups = groupsInFacet.size();
           }
-          facetField.setStringValue(facetValue);
+          facetFields[0].setStringValue(facetValue);
+          facetFields[1].setBytesValue(new BytesRef(facetValue));
           facetVals.add(facetValue);
+        } else {
+          for (Field facetField : facetFields) {
+            String facetValue = facetValues.get(random.nextInt(facetValues.size()));
+            uniqueFacetValues.add(facetValue);
+            if (!facetToGroups.containsKey(facetValue)) {
+              facetToGroups.put(facetValue, new HashSet<String>());
+            }
+            Set<String> groupsInFacet = facetToGroups.get(facetValue);
+            groupsInFacet.add(groupValue);
+            if (groupsInFacet.size() > facetWithMostGroups) {
+              facetWithMostGroups = groupsInFacet.size();
+            }
+            facetField.setStringValue(facetValue);
+            facetVals.add(facetValue);
+          }
         }
       } else {
         uniqueFacetValues.add(null);
@@ -443,6 +478,9 @@ public class TermGroupFacetCollectorTest
       }
 
       if (groupValue != null) {
+        if (useDv) {
+          groupDc.setBytesValue(new BytesRef(groupValue));
+        }
         group.setStringValue(groupValue);
       }
       content.setStringValue(contentStr);
@@ -460,7 +498,7 @@ public class TermGroupFacetCollectorTest
     DirectoryReader reader = writer.getReader();
     writer.close();
 
-    return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
+    return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv);
   }
 
   private GroupedFacetResult createExpectedFacetResult(String searchTerm, IndexContext context, int offset, int limit, int minCount, final boolean orderByCount, String facetPrefix) {
@@ -532,9 +570,14 @@ public class TermGroupFacetCollectorTest
     return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
   }
 
-  private TermGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
+  private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument, boolean useDv) {
     BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
-    return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024));
+    if (useDv) {
+      return DVGroupFacetCollector.createDvGroupFacetCollector(groupField, DocValues.Type.BYTES_VAR_SORTED,
+          random.nextBoolean(), facetField, DocValues.Type.BYTES_VAR_SORTED, random.nextBoolean(), facetPrefixBR, random.nextInt(1024));
+    } else {
+      return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024));
+    }
   }
 
   private String getFromSet(Set<String> set, int index) {
@@ -558,9 +601,10 @@ public class TermGroupFacetCollectorTest
     final int facetWithMostGroups;
     final int numGroups;
     final String[] contentStrings;
+    final boolean useDV;
 
     public IndexContext(Map<String, Map<String, Set<String>>> searchTermToFacetGroups, DirectoryReader r,
-                        int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues) {
+                        int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues, boolean useDV) {
       this.searchTermToFacetGroups = searchTermToFacetGroups;
       this.indexReader = r;
       this.numDocs = numDocs;
@@ -569,6 +613,7 @@ public class TermGroupFacetCollectorTest
       this.numGroups = numGroups;
       this.contentStrings = contentStrings;
       this.facetValues = facetValues;
+      this.useDV = useDV;
     }
   }