You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2012/03/15 10:31:07 UTC
svn commit: r1300860 - in /lucene/dev/trunk: lucene/contrib/
modules/grouping/src/java/org/apache/lucene/search/grouping/
modules/grouping/src/java/org/apache/lucene/search/grouping/dv/
modules/grouping/src/java/org/apache/lucene/search/grouping/term/ ...
Author: mvg
Date: Thu Mar 15 09:31:06 2012
New Revision: 1300860
URL: http://svn.apache.org/viewvc?rev=1300860&view=rev
Log:
LUCENE-3856: Added docvalues based grouped facet collector.
Added:
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java
- copied, changed from r1300853, lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java
Removed:
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java
Modified:
lucene/dev/trunk/lucene/contrib/CHANGES.txt
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
Modified: lucene/dev/trunk/lucene/contrib/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/CHANGES.txt?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/contrib/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/contrib/CHANGES.txt Thu Mar 15 09:31:06 2012
@@ -72,7 +72,7 @@ New Features
start/endOffset, if offsets are indexed. (Alan Woodward via Mike
McCandless)
- * LUCENE-3802: Support for grouped faceting. (Martijn van Groningen)
+ * LUCENE-3802, LUCENE-3856: Support for grouped faceting. (Martijn van Groningen)
API Changes
Modified: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java (original)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/AbstractGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -20,6 +20,7 @@ package org.apache.lucene.search.groupin
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.*;
@@ -34,11 +35,18 @@ public abstract class AbstractGroupFacet
protected final String groupField;
protected final String facetField;
protected final BytesRef facetPrefix;
+ protected final List<SegmentResult> segmentResults;
+
+ protected int[] segmentFacetCounts;
+ protected int segmentTotalCount;
+ protected int startFacetOrd;
+ protected int endFacetOrd;
protected AbstractGroupFacetCollector(String groupField, String facetField, BytesRef facetPrefix) {
this.groupField = groupField;
this.facetField = facetField;
this.facetPrefix = facetPrefix;
+ segmentResults = new ArrayList<SegmentResult>();
}
/**
@@ -52,7 +60,49 @@ public abstract class AbstractGroupFacet
* @return grouped facet results
* @throws IOException If I/O related errors occur during merging segment grouped facet counts.
*/
- public abstract GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException;
+ public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException {
+ if (segmentFacetCounts != null) {
+ segmentResults.add(createSegmentResult());
+ segmentFacetCounts = null; // reset
+ }
+
+ int totalCount = 0;
+ int missingCount = 0;
+ SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size());
+ for (SegmentResult segmentResult : segmentResults) {
+ missingCount += segmentResult.missing;
+ if (segmentResult.mergePos >= segmentResult.maxTermPos) {
+ continue;
+ }
+ totalCount += segmentResult.total;
+ segments.add(segmentResult);
+ }
+
+ GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
+ while (segments.size() > 0) {
+ SegmentResult segmentResult = segments.top();
+ BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm);
+ int count = 0;
+
+ do {
+ count += segmentResult.counts[segmentResult.mergePos++];
+ if (segmentResult.mergePos < segmentResult.maxTermPos) {
+ segmentResult.nextTerm();
+ segmentResult = segments.updateTop();
+ } else {
+ segments.pop();
+ segmentResult = segments.top();
+ if (segmentResult == null) {
+ break;
+ }
+ }
+ } while (currentFacetValue.equals(segmentResult.mergeTerm));
+ facetResult.addFacetCount(currentFacetValue, count);
+ }
+ return facetResult;
+ }
+
+ protected abstract SegmentResult createSegmentResult() throws IOException;
public void setScorer(Scorer scorer) throws IOException {
}
@@ -221,4 +271,45 @@ public abstract class AbstractGroupFacet
}
}
+ /**
+ * Contains the local grouped segment counts for a particular segment.
+ * Each <code>SegmentResult</code> must be added together.
+ */
+ protected abstract static class SegmentResult {
+
+ protected final int[] counts;
+ protected final int total;
+ protected final int missing;
+ protected final int maxTermPos;
+
+ protected BytesRef mergeTerm;
+ protected int mergePos;
+
+ protected SegmentResult(int[] counts, int total, int missing, int maxTermPos) {
+ this.counts = counts;
+ this.total = total;
+ this.missing = missing;
+ this.maxTermPos = maxTermPos;
+ }
+
+ /**
+ * Go to next term in this <code>SegmentResult</code> in order to retrieve the grouped facet counts.
+ *
+ * @throws IOException If I/O related errors occur
+ */
+ protected abstract void nextTerm() throws IOException;
+
+ }
+
+ private static class SegmentResultPriorityQueue extends PriorityQueue<SegmentResult> {
+
+ SegmentResultPriorityQueue(int maxSize) {
+ super(maxSize);
+ }
+
+ protected boolean lessThan(SegmentResult a, SegmentResult b) {
+ return a.mergeTerm.compareTo(b.mergeTerm) < 0;
+ }
+ }
+
}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java?rev=1300860&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -0,0 +1,288 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.AtomicReader;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.DocValues;
+import org.apache.lucene.search.grouping.AbstractGroupFacetCollector;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.SentinelIntSet;
+import org.apache.lucene.util.UnicodeUtil;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * An implementation of {@link AbstractGroupFacetCollector} that computes grouped facets based on docvalues.
+ *
+ * @lucene.experimental
+ */
+public abstract class DVGroupFacetCollector extends AbstractGroupFacetCollector {
+
+ final DocValues.Type groupDvType;
+ final boolean groupDiskResident;
+ final DocValues.Type facetFieldDvType;
+ final boolean facetDiskResident;
+
+ final List<GroupedFacetHit> groupedFacetHits;
+ final SentinelIntSet segmentGroupedFacetHits;
+
+ /**
+ * Factory method for creating the right implementation based on the group docvalues type and the facet docvalues
+ * type.
+ *
+ * Currently only the {@link DocValues.Type#BYTES_VAR_SORTED} and the {@link DocValues.Type#BYTES_FIXED_SORTED} are
+ * the only docvalues type supported for both the group and facet field.
+ *
+ * @param groupField The group field
+ * @param groupDvType The docvalues type for the group field
+ * @param groupDiskResident Whether the group docvalues should be disk resident
+ * @param facetField The facet field
+ * @param facetDvType The docvalues type for the facet field
+ * @param facetDiskResident Whether the facet docvalues should be disk resident
+ * @param facetPrefix The facet prefix a facet entry should start with to be included.
+ * @param initialSize The initial allocation size of the internal int set and group facet list which should roughly
+ * match the total number of expected unique groups. Be aware that the heap usage is
+ * 4 bytes * initialSize.
+ * @return a <code>DVGroupFacetCollector</code> implementation
+ */
+ public static DVGroupFacetCollector createDvGroupFacetCollector(String groupField,
+ DocValues.Type groupDvType,
+ boolean groupDiskResident,
+ String facetField,
+ DocValues.Type facetDvType,
+ boolean facetDiskResident,
+ BytesRef facetPrefix,
+ int initialSize) {
+ switch (groupDvType) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FLOAT_32:
+ case FLOAT_64:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ return GroupSortedBR.createGroupSortedFacetCollector(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
+ default:
+ throw new IllegalArgumentException(String.format("Group valueType %s not supported", groupDvType));
+ }
+ }
+
+ DVGroupFacetCollector(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) {
+ super(groupField, facetField, facetPrefix);
+ this.groupDvType = groupDvType;
+ this.groupDiskResident = groupDiskResident;
+ this.facetFieldDvType = facetFieldDvType;
+ this.facetDiskResident = facetDiskResident;
+ groupedFacetHits = new ArrayList<GroupedFacetHit>(initialSize);
+ segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1);
+ }
+
+ static abstract class GroupSortedBR extends DVGroupFacetCollector {
+
+ final BytesRef facetSpare = new BytesRef();
+ final BytesRef groupSpare = new BytesRef();
+ DocValues.SortedSource groupFieldSource;
+
+ GroupSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetFieldDvType, boolean facetDiskResident, BytesRef facetPrefix, int initialSize) {
+ super(groupField, groupDvType, groupDiskResident, facetField, facetFieldDvType, facetDiskResident, facetPrefix, initialSize);
+ }
+
+ static DVGroupFacetCollector createGroupSortedFacetCollector(String groupField,
+ DocValues.Type groupDvType,
+ boolean groupDiskResident,
+ String facetField,
+ DocValues.Type facetDvType,
+ boolean facetDiskResident,
+ BytesRef facetPrefix,
+ int initialSize) {
+ switch (facetDvType) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ case FLOAT_32:
+ case FLOAT_64:
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ return new FacetSortedBR(groupField, groupDvType, groupDiskResident, facetField, facetDvType, facetDiskResident, facetPrefix, initialSize);
+ default:
+ throw new IllegalArgumentException(String.format("Facet valueType %s not supported", facetDvType));
+ }
+ }
+
+
+ static class FacetSortedBR extends GroupSortedBR {
+
+ private DocValues.SortedSource facetFieldSource;
+
+ FacetSortedBR(String groupField, DocValues.Type groupDvType, boolean groupDiskResident, String facetField, DocValues.Type facetDvType, boolean diskResident, BytesRef facetPrefix, int initialSize) {
+ super(groupField, groupDvType, groupDiskResident, facetField, facetDvType, diskResident, facetPrefix, initialSize);
+ }
+
+ public void collect(int doc) throws IOException {
+ int facetOrd = facetFieldSource.ord(doc);
+ if (facetOrd < startFacetOrd || facetOrd >= endFacetOrd) {
+ return;
+ }
+
+ int groupOrd = groupFieldSource.ord(doc);
+ int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
+ if (segmentGroupedFacetHits.exists(segmentGroupedFacetsIndex)) {
+ return;
+ }
+
+ segmentTotalCount++;
+ segmentFacetCounts[facetOrd]++;
+
+ segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
+ groupedFacetHits.add(
+ new GroupedFacetHit(
+ groupFieldSource.getByOrd(groupOrd, new BytesRef()),
+ facetFieldSource.getByOrd(facetOrd, new BytesRef())
+ )
+ );
+ }
+
+ public void setNextReader(AtomicReaderContext context) throws IOException {
+ if (segmentFacetCounts != null) {
+ segmentResults.add(createSegmentResult());
+ }
+
+ groupFieldSource = getDocValuesSortedSource(groupField, groupDvType, groupDiskResident, context.reader());
+ facetFieldSource = getDocValuesSortedSource(facetField, facetFieldDvType, facetDiskResident, context.reader());
+ segmentFacetCounts = new int[facetFieldSource.getValueCount()];
+ segmentTotalCount = 0;
+
+ segmentGroupedFacetHits.clear();
+ for (GroupedFacetHit groupedFacetHit : groupedFacetHits) {
+ int facetOrd = facetFieldSource.getOrdByValue(groupedFacetHit.facetValue, facetSpare);
+ if (facetOrd < 0) {
+ continue;
+ }
+
+ int groupOrd = groupFieldSource.getOrdByValue(groupedFacetHit.groupValue, groupSpare);
+ if (groupOrd < 0) {
+ continue;
+ }
+
+ int segmentGroupedFacetsIndex = (groupOrd * facetFieldSource.getValueCount()) + facetOrd;
+ segmentGroupedFacetHits.put(segmentGroupedFacetsIndex);
+ }
+
+ if (facetPrefix != null) {
+ startFacetOrd = facetFieldSource.getOrdByValue(facetPrefix, facetSpare);
+ if (startFacetOrd < 0) {
+ // Points to the ord one higher than facetPrefix
+ startFacetOrd = -startFacetOrd - 1;
+ }
+ BytesRef facetEndPrefix = BytesRef.deepCopyOf(facetPrefix);
+ facetEndPrefix.append(UnicodeUtil.BIG_TERM);
+ endFacetOrd = facetFieldSource.getOrdByValue(facetEndPrefix, facetSpare);
+ endFacetOrd = -endFacetOrd - 1; // Points to the ord one higher than facetEndPrefix
+ } else {
+ startFacetOrd = 0;
+ endFacetOrd = facetFieldSource.getValueCount();
+ }
+ }
+
+ protected SegmentResult createSegmentResult() throws IOException {
+ if (startFacetOrd == 0 && facetFieldSource.getByOrd(startFacetOrd, facetSpare).length == 0) {
+ int missing = segmentFacetCounts[0];
+ int total = segmentTotalCount - segmentFacetCounts[0];
+ return new SegmentResult(segmentFacetCounts, total, missing, facetFieldSource, endFacetOrd);
+ } else {
+ return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldSource, startFacetOrd, endFacetOrd);
+ }
+ }
+
+ private DocValues.SortedSource getDocValuesSortedSource(String field, DocValues.Type dvType, boolean diskResident, AtomicReader reader) throws IOException {
+ DocValues dv = reader.docValues(field);
+ DocValues.Source dvSource;
+ if (dv != null) {
+ dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+ } else {
+ dvSource = DocValues.getDefaultSortedSource(dvType, reader.maxDoc());
+ }
+ return dvSource.asSortedSource();
+ }
+
+ private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
+
+ final DocValues.SortedSource facetFieldSource;
+ final BytesRef spare = new BytesRef();
+
+ SegmentResult(int[] counts, int total, int missing, DocValues.SortedSource facetFieldSource, int endFacetOrd) {
+ super(counts, total, missing, endFacetOrd);
+ this.facetFieldSource = facetFieldSource;
+ this.mergePos = 1;
+ if (mergePos < maxTermPos) {
+ mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+ }
+ }
+
+ SegmentResult(int[] counts, int total, DocValues.SortedSource facetFieldSource, int startFacetOrd, int endFacetOrd) {
+ super(counts, total, 0, endFacetOrd);
+ this.facetFieldSource = facetFieldSource;
+ this.mergePos = startFacetOrd;
+ if (mergePos < maxTermPos) {
+ mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+ }
+ }
+
+ /**
+ * {@inheritDoc}
+ */
+ protected void nextTerm() throws IOException {
+ mergeTerm = facetFieldSource.getByOrd(mergePos, spare);
+ }
+
+ }
+
+ }
+
+ }
+
+}
+
+class GroupedFacetHit {
+
+ final BytesRef groupValue;
+ final BytesRef facetValue;
+
+ GroupedFacetHit(BytesRef groupValue, BytesRef facetValue) {
+ this.groupValue = groupValue;
+ this.facetValue = facetValue;
+ }
+}
Modified: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java?rev=1300860&r1=1300859&r2=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java (original)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/term/TermGroupFacetCollector.java Thu Mar 15 09:31:06 2012
@@ -38,14 +38,9 @@ public abstract class TermGroupFacetColl
final List<GroupedFacetHit> groupedFacetHits;
final SentinelIntSet segmentGroupedFacetHits;
- final List<SegmentResult> segmentResults;
final BytesRef spare = new BytesRef();
FieldCache.DocTermsIndex groupFieldTermsIndex;
- int[] segmentFacetCounts;
- int segmentTotalCount;
- int startFacetOrd;
- int endFacetOrd;
/**
* Factory method for creating the right implementation based on the fact whether the facet field contains
@@ -76,57 +71,8 @@ public abstract class TermGroupFacetColl
super(groupField, facetField, facetPrefix);
groupedFacetHits = new ArrayList<GroupedFacetHit>(initialSize);
segmentGroupedFacetHits = new SentinelIntSet(initialSize, -1);
- segmentResults = new ArrayList<SegmentResult>();
}
- /**
- * {@inheritDoc}
- */
- public GroupedFacetResult mergeSegmentResults(int size, int minCount, boolean orderByCount) throws IOException {
- if (segmentFacetCounts != null) {
- segmentResults.add(createSegmentResult());
- segmentFacetCounts = null; // reset
- }
-
- int totalCount = 0;
- int missingCount = 0;
- SegmentResultPriorityQueue segments = new SegmentResultPriorityQueue(segmentResults.size());
- for (SegmentResult segmentResult : segmentResults) {
- missingCount += segmentResult.missing;
- if (segmentResult.mergePos >= segmentResult.maxTermPos) {
- continue;
- }
- totalCount += segmentResult.total;
- segmentResult.initializeForMerge();
- segments.add(segmentResult);
- }
-
- GroupedFacetResult facetResult = new GroupedFacetResult(size, minCount, orderByCount, totalCount, missingCount);
- while (segments.size() > 0) {
- SegmentResult segmentResult = segments.top();
- BytesRef currentFacetValue = BytesRef.deepCopyOf(segmentResult.mergeTerm);
- int count = 0;
-
- do {
- count += segmentResult.counts[segmentResult.mergePos++];
- if (segmentResult.mergePos < segmentResult.maxTermPos) {
- segmentResult.nextTerm();
- segmentResult = segments.updateTop();
- } else {
- segments.pop();
- segmentResult = segments.top();
- if (segmentResult == null) {
- break;
- }
- }
- } while (currentFacetValue.equals(segmentResult.mergeTerm));
- facetResult.addFacetCount(currentFacetValue, count);
- }
- return facetResult;
- }
-
- protected abstract SegmentResult createSegmentResult();
-
// Implementation for single valued facet fields.
static class SV extends TermGroupFacetCollector {
@@ -202,9 +148,30 @@ public abstract class TermGroupFacetColl
}
}
- protected SegmentResult createSegmentResult() {
+ protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldTermsIndex.getTermsEnum(), startFacetOrd, endFacetOrd);
}
+
+ private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
+
+ final TermsEnum tenum;
+
+ SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd) throws IOException {
+ super(counts, total - counts[0], counts[0], endFacetOrd);
+ this.tenum = tenum;
+ this.mergePos = startFacetOrd == 0 ? 1 : startFacetOrd;
+ if (mergePos < maxTermPos) {
+ tenum.seekExact(mergePos);
+ mergeTerm = tenum.term();
+ }
+ }
+
+ protected void nextTerm() throws IOException {
+ mergeTerm = tenum.next();
+ }
+
+ }
+
}
// Implementation for multi valued facet fields.
@@ -316,54 +283,28 @@ public abstract class TermGroupFacetColl
}
}
- protected SegmentResult createSegmentResult() {
+ protected SegmentResult createSegmentResult() throws IOException {
return new SegmentResult(segmentFacetCounts, segmentTotalCount, facetFieldDocTermOrds.numTerms(), facetOrdTermsEnum, startFacetOrd, endFacetOrd);
}
- }
-}
+ private static class SegmentResult extends AbstractGroupFacetCollector.SegmentResult {
-class SegmentResult {
-
- final int[] counts;
- final int total;
- final int missing;
-
- // Used for merging the segment results
- BytesRef mergeTerm;
- int mergePos;
- final int maxTermPos;
- final TermsEnum tenum;
-
- SegmentResult(int[] counts, int total, TermsEnum tenum, int startFacetOrd, int endFacetOrd) {
- this.counts = counts;
- this.missing = counts[0];
- this.total = total - missing;
- this.tenum = tenum;
- this.mergePos = startFacetOrd == 0 ? 1 : startFacetOrd;
- this.maxTermPos = endFacetOrd;
- }
+ final TermsEnum tenum;
- SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd) {
- this.counts = counts;
- this.missing = counts[missingCountIndex];
- this.total = total - missing;
- this.tenum = tenum;
- this.mergePos = startFacetOrd;
- if (endFacetOrd == missingCountIndex + 1) {
- this.maxTermPos = missingCountIndex;
- } else {
- this.maxTermPos = endFacetOrd;
- }
- }
+ SegmentResult(int[] counts, int total, int missingCountIndex, TermsEnum tenum, int startFacetOrd, int endFacetOrd) throws IOException {
+ super(counts, total - counts[missingCountIndex], counts[missingCountIndex],
+ endFacetOrd == missingCountIndex + 1 ? missingCountIndex : endFacetOrd);
+ this.tenum = tenum;
+ this.mergePos = startFacetOrd;
+ tenum.seekExact(mergePos);
+ mergeTerm = tenum.term();
+ }
- void initializeForMerge() throws IOException {
- tenum.seekExact(mergePos);
- mergeTerm = tenum.term();
- }
+ protected void nextTerm() throws IOException {
+ mergeTerm = tenum.next();
+ }
- void nextTerm() throws IOException {
- mergeTerm = tenum.next();
+ }
}
}
@@ -377,15 +318,4 @@ class GroupedFacetHit {
this.groupValue = groupValue;
this.facetValue = facetValue;
}
-}
-
-class SegmentResultPriorityQueue extends PriorityQueue<SegmentResult> {
-
- SegmentResultPriorityQueue(int maxSize) {
- super(maxSize);
- }
-
- protected boolean lessThan(SegmentResult a, SegmentResult b) {
- return a.mergeTerm.compareTo(b.mergeTerm) < 0;
- }
-}
+}
\ No newline at end of file
Copied: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (from r1300853, lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java?p2=lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java&p1=lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java&r1=1300853&r2=1300860&rev=1300860&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TermGroupFacetCollectorTest.java (original)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java Thu Mar 15 09:31:06 2012
@@ -26,6 +26,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.grouping.dv.DVGroupFacetCollector;
import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -34,7 +35,7 @@ import org.apache.lucene.util._TestUtil;
import java.io.IOException;
import java.util.*;
-public class TermGroupFacetCollectorTest extends AbstractGroupingTestCase {
+public class GroupFacetCollectorTest extends AbstractGroupingTestCase {
public void testSimple() throws Exception {
final String groupField = "hotel";
@@ -47,47 +48,47 @@ public class TermGroupFacetCollectorTest
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
- boolean canUseIDV = false;// Enable later... !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+ boolean canUseDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+ boolean useDv = canUseDV && random.nextBoolean();
// 0
Document doc = new Document();
- addGroupField(doc, groupField, "a", canUseIDV);
- doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "a", canUseDV);
+ addField(doc, "airport", "ams", canUseDV);
+ addField(doc, "duration", "5", canUseDV);
w.addDocument(doc);
// 1
doc = new Document();
- addGroupField(doc, groupField, "a", canUseIDV);
- doc.add(new Field("airport", "dus", TextField.TYPE_STORED));
- doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "a", canUseDV);
+ addField(doc, "airport", "dus", canUseDV);
+ addField(doc, "duration", "10", canUseDV);
w.addDocument(doc);
// 2
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "airport", "ams", canUseDV);
+ addField(doc, "duration", "10", canUseDV);
w.addDocument(doc);
w.commit(); // To ensure a second segment
// 3
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "airport", "ams", canUseDV);
+ addField(doc, "duration", "5", canUseDV);
w.addDocument(doc);
// 4
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("airport", "ams", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "airport", "ams", canUseDV);
+ addField(doc, "duration", "5", canUseDV);
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
- TermGroupFacetCollector groupedAirportFacetCollector =
- TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128);
+ AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@@ -101,8 +102,7 @@ public class TermGroupFacetCollectorTest
assertEquals(1, entries.get(1).getCount());
- TermGroupFacetCollector groupedDurationFacetCollector =
- TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128);
+ AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(4, durationResult.getTotalCount());
@@ -117,34 +117,34 @@ public class TermGroupFacetCollectorTest
// 5
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("duration", "5", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "duration", "5", canUseDV);
w.addDocument(doc);
// 6
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "airport", "bru", canUseDV);
+ addField(doc, "duration", "10", canUseDV);
w.addDocument(doc);
// 7
doc = new Document();
- addGroupField(doc, groupField, "b", canUseIDV);
- doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "b", canUseDV);
+ addField(doc, "airport", "bru", canUseDV);
+ addField(doc, "duration", "15", canUseDV);
w.addDocument(doc);
// 8
doc = new Document();
- addGroupField(doc, groupField, "a", canUseIDV);
- doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "a", canUseDV);
+ addField(doc, "airport", "bru", canUseDV);
+ addField(doc, "duration", "10", canUseDV);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
- groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", true, null, 128);
+ groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
assertEquals(5, airportResult.getTotalCount());
@@ -157,7 +157,7 @@ public class TermGroupFacetCollectorTest
assertEquals("dus", entries.get(1).getValue().utf8ToString());
assertEquals(1, entries.get(1).getCount());
- groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, null, 128);
+ groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
assertEquals(5, durationResult.getTotalCount());
@@ -170,21 +170,21 @@ public class TermGroupFacetCollectorTest
// 9
doc = new Document();
- addGroupField(doc, groupField, "c", canUseIDV);
- doc.add(new Field("airport", "bru", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "15", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "c", canUseDV);
+ addField(doc, "airport", "bru", canUseDV);
+ addField(doc, "duration", "15", canUseDV);
w.addDocument(doc);
// 10
doc = new Document();
- addGroupField(doc, groupField, "c", canUseIDV);
- doc.add(new Field("airport", "dus", TextField.TYPE_UNSTORED));
- doc.add(new Field("duration", "10", TextField.TYPE_UNSTORED));
+ addField(doc, groupField, "c", canUseDV);
+ addField(doc, "airport", "dus", canUseDV);
+ addField(doc, "duration", "10", canUseDV);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
- groupedAirportFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "airport", false, null, 128);
+ groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(7, airportResult.getTotalCount());
@@ -199,7 +199,7 @@ public class TermGroupFacetCollectorTest
assertEquals("dus", entries.get(2).getValue().utf8ToString());
assertEquals(2, entries.get(2).getCount());
- groupedDurationFacetCollector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, "duration", false, new BytesRef("1"), 128);
+ groupedDurationFacetCollector = createRandomCollector(groupField, "duration", "1", false, useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
assertEquals(5, durationResult.getTotalCount());
@@ -217,10 +217,10 @@ public class TermGroupFacetCollectorTest
dir.close();
}
- private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
- doc.add(new Field(groupField, value, TextField.TYPE_UNSTORED));
+ private void addField(Document doc, String field, String value, boolean canUseIDV) {
+ doc.add(new Field(field, value, StringField.TYPE_UNSTORED));
if (canUseIDV) {
- doc.add(new DocValuesField(groupField, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED));
+ doc.add(new DocValuesField(field, new BytesRef(value), DocValues.Type.BYTES_VAR_SORTED));
}
}
@@ -232,6 +232,7 @@ public class TermGroupFacetCollectorTest
final IndexSearcher searcher = newSearcher(context.indexReader);
for (int searchIter = 0; searchIter < 100; searchIter++) {
+ boolean useDv = context.useDV && random.nextBoolean();
String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
int limit = random.nextInt(context.facetValues.size());
int offset = random.nextInt(context.facetValues.size() - limit);
@@ -254,7 +255,7 @@ public class TermGroupFacetCollectorTest
}
GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
- TermGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument);
+ AbstractGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument, useDv);
searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
@@ -357,19 +358,37 @@ public class TermGroupFacetCollectorTest
new MockAnalyzer(random)
)
);
+ boolean canUseDV = !"Lucene3x".equals(writer.w.getConfig().getCodec().getName());
+ boolean useDv = canUseDV && random.nextBoolean();
Document doc = new Document();
Document docNoGroup = new Document();
Document docNoFacet = new Document();
Document docNoGroupNoFacet = new Document();
Field group = newField("group", "", StringField.TYPE_UNSTORED);
+ DocValuesField groupDc = new DocValuesField("group", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+ if (useDv) {
+ doc.add(groupDc);
+ docNoFacet.add(groupDc);
+ }
doc.add(group);
docNoFacet.add(group);
- Field[] facetFields = multipleFacetValuesPerDocument? new Field[2 + random.nextInt(6)] : new Field[1];
- for (int i = 0; i < facetFields.length; i++) {
- facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED);
- doc.add(facetFields[i]);
- docNoGroup.add(facetFields[i]);
+ Field[] facetFields;
+ if (useDv) {
+ facetFields = new Field[2];
+ facetFields[0] = newField("facet", "", StringField.TYPE_UNSTORED);
+ doc.add(facetFields[0]);
+ docNoGroup.add(facetFields[0]);
+ facetFields[1] = new DocValuesField("facet", new BytesRef(), DocValues.Type.BYTES_VAR_SORTED);
+ doc.add(facetFields[1]);
+ docNoGroup.add(facetFields[1]);
+ } else {
+ facetFields = multipleFacetValuesPerDocument ? new Field[2 + random.nextInt(6)] : new Field[1];
+ for (int i = 0; i < facetFields.length; i++) {
+ facetFields[i] = newField("facet", "", StringField.TYPE_UNSTORED);
+ doc.add(facetFields[i]);
+ docNoGroup.add(facetFields[i]);
+ }
}
Field content = newField("content", "", StringField.TYPE_UNSTORED);
doc.add(content);
@@ -412,7 +431,7 @@ public class TermGroupFacetCollectorTest
List<String> facetVals = new ArrayList<String>();
if (random.nextInt(24) != 18) {
- for (Field facetField : facetFields) {
+ if (useDv) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
if (!facetToGroups.containsKey(facetValue)) {
@@ -423,8 +442,24 @@ public class TermGroupFacetCollectorTest
if (groupsInFacet.size() > facetWithMostGroups) {
facetWithMostGroups = groupsInFacet.size();
}
- facetField.setStringValue(facetValue);
+ facetFields[0].setStringValue(facetValue);
+ facetFields[1].setBytesValue(new BytesRef(facetValue));
facetVals.add(facetValue);
+ } else {
+ for (Field facetField : facetFields) {
+ String facetValue = facetValues.get(random.nextInt(facetValues.size()));
+ uniqueFacetValues.add(facetValue);
+ if (!facetToGroups.containsKey(facetValue)) {
+ facetToGroups.put(facetValue, new HashSet<String>());
+ }
+ Set<String> groupsInFacet = facetToGroups.get(facetValue);
+ groupsInFacet.add(groupValue);
+ if (groupsInFacet.size() > facetWithMostGroups) {
+ facetWithMostGroups = groupsInFacet.size();
+ }
+ facetField.setStringValue(facetValue);
+ facetVals.add(facetValue);
+ }
}
} else {
uniqueFacetValues.add(null);
@@ -443,6 +478,9 @@ public class TermGroupFacetCollectorTest
}
if (groupValue != null) {
+ if (useDv) {
+ groupDc.setBytesValue(new BytesRef(groupValue));
+ }
group.setStringValue(groupValue);
}
content.setStringValue(contentStr);
@@ -460,7 +498,7 @@ public class TermGroupFacetCollectorTest
DirectoryReader reader = writer.getReader();
writer.close();
- return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues);
+ return new IndexContext(searchTermToFacetToGroups, reader, numDocs, dir, facetWithMostGroups, numGroups, contentBrs, uniqueFacetValues, useDv);
}
private GroupedFacetResult createExpectedFacetResult(String searchTerm, IndexContext context, int offset, int limit, int minCount, final boolean orderByCount, String facetPrefix) {
@@ -532,9 +570,14 @@ public class TermGroupFacetCollectorTest
return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
}
- private TermGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
+ private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument, boolean useDv) {
BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
- return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024));
+ if (useDv) {
+ return DVGroupFacetCollector.createDvGroupFacetCollector(groupField, DocValues.Type.BYTES_VAR_SORTED,
+ random.nextBoolean(), facetField, DocValues.Type.BYTES_VAR_SORTED, random.nextBoolean(), facetPrefixBR, random.nextInt(1024));
+ } else {
+ return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random.nextInt(1024));
+ }
}
private String getFromSet(Set<String> set, int index) {
@@ -558,9 +601,10 @@ public class TermGroupFacetCollectorTest
final int facetWithMostGroups;
final int numGroups;
final String[] contentStrings;
+ final boolean useDV;
public IndexContext(Map<String, Map<String, Set<String>>> searchTermToFacetGroups, DirectoryReader r,
- int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues) {
+ int numDocs, Directory dir, int facetWithMostGroups, int numGroups, String[] contentStrings, NavigableSet<String> facetValues, boolean useDV) {
this.searchTermToFacetGroups = searchTermToFacetGroups;
this.indexReader = r;
this.numDocs = numDocs;
@@ -569,6 +613,7 @@ public class TermGroupFacetCollectorTest
this.numGroups = numGroups;
this.contentStrings = contentStrings;
this.facetValues = facetValues;
+ this.useDV = useDV;
}
}