You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mv...@apache.org on 2011/11/24 20:08:00 UTC
svn commit: r1205960 [1/2] - in /lucene/dev/trunk/modules/grouping: ./
src/java/org/apache/lucene/search/grouping/
src/java/org/apache/lucene/search/grouping/dv/
src/test/org/apache/lucene/search/grouping/
Author: mvg
Date: Thu Nov 24 19:07:59 2011
New Revision: 1205960
URL: http://svn.apache.org/viewvc?rev=1205960&view=rev
Log:
LUCENE-3496: Support grouping by IndexDocValues
Added:
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
Modified:
lucene/dev/trunk/modules/grouping/CHANGES.txt
lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
Modified: lucene/dev/trunk/modules/grouping/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/CHANGES.txt?rev=1205960&r1=1205959&r2=1205960&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/CHANGES.txt (original)
+++ lucene/dev/trunk/modules/grouping/CHANGES.txt Thu Nov 24 19:07:59 2011
@@ -14,3 +14,7 @@ API Changes
LUCENE-3483: Move Function grouping collectors from Solr to
grouping module. (Martijn van Groningen)
+
+New features
+
+LUCENE-3496: Support grouping by IndexDocValues. (Martijn van Groningen)
\ No newline at end of file
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java?rev=1205960&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupHeadsCollector.java Thu Nov 24 19:07:59 2011
@@ -0,0 +1,305 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.IndexDocValues;
+import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * A base implementation of {@link org.apache.lucene.search.grouping.AbstractAllGroupHeadsCollector} for retrieving
+ * the most relevant groups when grouping on a indexed doc values field.
+ *
+ * @lucene.experimental
+ */
+//TODO - (MvG): Add more optimized implementations
+public abstract class DVAllGroupHeadsCollector<GH extends AbstractAllGroupHeadsCollector.GroupHead> extends AbstractAllGroupHeadsCollector<GH> {
+
+ final String groupField;
+ final boolean diskResident;
+ final ValueType valueType;
+ final BytesRef scratchBytesRef = new BytesRef();
+
+ IndexReader.AtomicReaderContext readerContext;
+ Scorer scorer;
+
+ DVAllGroupHeadsCollector(String groupField, ValueType valueType, int numberOfSorts, boolean diskResident) {
+ super(numberOfSorts);
+ this.groupField = groupField;
+ this.valueType = valueType;
+ this.diskResident = diskResident;
+ }
+
+ /**
+ * Creates an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments.
+ * This factory method decides with implementation is best suited.
+ *
+ * @param groupField The field to group by
+ * @param sortWithinGroup The sort within each group
+ * @param type The {@link ValueType} which is used to select a concrete implementation.
+ * @param diskResident Whether the values to group by should be disk resident
+ * @return an <code>AbstractAllGroupHeadsCollector</code> instance based on the supplied arguments
+ * @throws IOException If I/O related errors occur
+ */
+ public static AbstractAllGroupHeadsCollector create(String groupField, Sort sortWithinGroup, ValueType type, boolean diskResident) throws IOException {
+ switch (type) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ return new GeneralAllGroupHeadsCollector.Lng(groupField, type, sortWithinGroup, diskResident);
+ case FLOAT_32:
+ case FLOAT_64:
+ return new GeneralAllGroupHeadsCollector.Dbl(groupField, type, sortWithinGroup, diskResident);
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ return new GeneralAllGroupHeadsCollector.BR(groupField, type, sortWithinGroup, diskResident);
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ return new GeneralAllGroupHeadsCollector.SortedBR(groupField, type, sortWithinGroup, diskResident);
+ default:
+ throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ }
+ }
+
+ static class GroupHead extends AbstractAllGroupHeadsCollector.GroupHead<Comparable> {
+
+ final FieldComparator[] comparators;
+ IndexReader.AtomicReaderContext readerContext;
+ Scorer scorer;
+
+ GroupHead(Comparable groupValue, Sort sort, int doc, IndexReader.AtomicReaderContext readerContext, Scorer scorer) throws IOException {
+ super(groupValue, doc + readerContext.docBase);
+ final SortField[] sortFields = sort.getSort();
+ comparators = new FieldComparator[sortFields.length];
+ for (int i = 0; i < sortFields.length; i++) {
+ comparators[i] = sortFields[i].getComparator(1, i).setNextReader(readerContext);
+ comparators[i].setScorer(scorer);
+ comparators[i].copy(0, doc);
+ comparators[i].setBottom(0);
+ }
+
+ this.readerContext = readerContext;
+ this.scorer = scorer;
+ }
+
+ public int compare(int compIDX, int doc) throws IOException {
+ return comparators[compIDX].compareBottom(doc);
+ }
+
+ public void updateDocHead(int doc) throws IOException {
+ for (FieldComparator comparator : comparators) {
+ comparator.copy(0, doc);
+ comparator.setBottom(0);
+ }
+ this.doc = doc + readerContext.docBase;
+ }
+ }
+
+ @Override
+ public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException {
+ this.readerContext = readerContext;
+
+ final IndexDocValues dv = readerContext.reader.docValues(groupField);
+ final IndexDocValues.Source dvSource;
+ if (dv != null) {
+ dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+ } else {
+ dvSource = getDefaultSource(readerContext);
+ }
+ setDocValuesSources(dvSource);
+ }
+
+ /**
+ * Sets the idv source for concrete implementations to use.
+ *
+ * @param source The idv source to be used by concrete implementations
+ */
+ protected abstract void setDocValuesSources(IndexDocValues.Source source);
+
+ /**
+ * @return The default source when no doc values are available.
+ * @param readerContext The current reader context
+ */
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSource(valueType);
+ }
+
+ // A general impl that works for any group sort.
+ static abstract class GeneralAllGroupHeadsCollector extends DVAllGroupHeadsCollector<DVAllGroupHeadsCollector.GroupHead> {
+
+ private final Sort sortWithinGroup;
+ private final Map<Comparable, GroupHead> groups;
+
+ GeneralAllGroupHeadsCollector(String groupField, ValueType valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ super(groupField, valueType, sortWithinGroup.getSort().length, diskResident);
+ this.sortWithinGroup = sortWithinGroup;
+ groups = new HashMap<Comparable, GroupHead>();
+
+ final SortField[] sortFields = sortWithinGroup.getSort();
+ for (int i = 0; i < sortFields.length; i++) {
+ reversed[i] = sortFields[i].getReverse() ? -1 : 1;
+ }
+ }
+
+ protected void retrieveGroupHeadAndAddIfNotExist(int doc) throws IOException {
+ final Comparable groupValue = getGroupValue(doc);
+ GroupHead groupHead = groups.get(groupValue);
+ if (groupHead == null) {
+ groupHead = new GroupHead(groupValue, sortWithinGroup, doc, readerContext, scorer);
+ groups.put(groupValue == null ? null : duplicate(groupValue), groupHead);
+ temporalResult.stop = true;
+ } else {
+ temporalResult.stop = false;
+ }
+ temporalResult.groupHead = groupHead;
+ }
+
+ protected abstract Comparable getGroupValue(int doc);
+
+ protected abstract Comparable duplicate(Comparable value);
+
+ protected Collection<GroupHead> getCollectedGroupHeads() {
+ return groups.values();
+ }
+
+ public void setNextReader(IndexReader.AtomicReaderContext context) throws IOException {
+ super.setNextReader(context);
+ for (GroupHead groupHead : groups.values()) {
+ for (int i = 0; i < groupHead.comparators.length; i++) {
+ groupHead.comparators[i] = groupHead.comparators[i].setNextReader(context);
+ groupHead.readerContext = context;
+ }
+ }
+ }
+
+ public void setScorer(Scorer scorer) throws IOException {
+ this.scorer = scorer;
+ for (GroupHead groupHead : groups.values()) {
+ groupHead.scorer = scorer;
+ for (FieldComparator comparator : groupHead.comparators) {
+ comparator.setScorer(scorer);
+ }
+ }
+ }
+
+ static class SortedBR extends GeneralAllGroupHeadsCollector {
+
+ private IndexDocValues.SortedSource source;
+
+ SortedBR(String groupField, ValueType valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ super(groupField, valueType, sortWithinGroup, diskResident);
+ }
+
+ protected Comparable getGroupValue(int doc) {
+ return source.getBytes(doc, scratchBytesRef);
+ }
+
+ protected Comparable duplicate(Comparable value) {
+ return new BytesRef((BytesRef) value);
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source.asSortedSource();
+ }
+
+ @Override
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSortedSource(valueType, readerContext.reader.maxDoc());
+ }
+ }
+
+ static class BR extends GeneralAllGroupHeadsCollector {
+
+ private IndexDocValues.Source source;
+
+ BR(String groupField, ValueType valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ super(groupField, valueType, sortWithinGroup, diskResident);
+ }
+
+ protected Comparable getGroupValue(int doc) {
+ return source.getBytes(doc, scratchBytesRef);
+ }
+
+ protected Comparable duplicate(Comparable value) {
+ return new BytesRef((BytesRef) value);
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+
+ }
+
+ static class Lng extends GeneralAllGroupHeadsCollector {
+
+ private IndexDocValues.Source source;
+
+ Lng(String groupField, ValueType valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ super(groupField, valueType, sortWithinGroup, diskResident);
+ }
+
+ protected Comparable getGroupValue(int doc) {
+ return source.getInt(doc);
+ }
+
+ protected Comparable duplicate(Comparable value) {
+ return value;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+ }
+
+ static class Dbl extends GeneralAllGroupHeadsCollector {
+
+ private IndexDocValues.Source source;
+
+ Dbl(String groupField, ValueType valueType, Sort sortWithinGroup, boolean diskResident) throws IOException {
+ super(groupField, valueType, sortWithinGroup, diskResident);
+ }
+
+ protected Comparable getGroupValue(int doc) {
+ return source.getFloat(doc);
+ }
+
+ protected Comparable duplicate(Comparable value) {
+ return value;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+
+ }
+
+ }
+
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java?rev=1205960&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVAllGroupsCollector.java Thu Nov 24 19:07:59 2011
@@ -0,0 +1,255 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.IndexDocValues;
+import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.grouping.AbstractAllGroupsCollector;
+import org.apache.lucene.search.grouping.SentinelIntSet;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * Implementation of {@link AbstractAllGroupsCollector} that groups documents based on
+ * {@link IndexDocValues} fields.
+ *
+ * @lucene.experimental
+ */
+public abstract class DVAllGroupsCollector<GROUP_VALUE_TYPE> extends AbstractAllGroupsCollector<GROUP_VALUE_TYPE> {
+
+ private static final int DEFAULT_INITIAL_SIZE = 128;
+
+ /**
+ * Expert: Constructs a {@link DVAllGroupsCollector}.
+ * Selects and constructs the most optimal all groups collector implementation for grouping by {@link IndexDocValues}.
+ *
+ *
+ * @param groupField The field to group by
+ * @param type The {@link ValueType} which is used to select a concrete implementation.
+ * @param diskResident Whether the values to group by should be disk resident
+ * @param initialSize The initial allocation size of the
+ * internal int set and group list
+ * which should roughly match the total
+ * number of expected unique groups. Be aware that the
+ * heap usage is 4 bytes * initialSize. Not all concrete implementions use this!
+ * @return the most optimal all groups collector implementation for grouping by {@link IndexDocValues}
+ */
+ public static DVAllGroupsCollector create(String groupField, ValueType type, boolean diskResident, int initialSize) {
+ switch (type) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ return new Lng(groupField, type, diskResident);
+ case FLOAT_32:
+ case FLOAT_64:
+ return new Dbl(groupField, type, diskResident);
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ return new BR(groupField, type, diskResident);
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ return new SortedBR(groupField, type, diskResident, initialSize);
+ default:
+ throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ }
+ }
+
+ /**
+ * Constructs a {@link DVAllGroupsCollector}.
+ * Selects and constructs the most optimal all groups collector implementation for grouping by {@link IndexDocValues}.
+ * If implementations require an initial allocation size then this will be set to 128.
+ *
+ *
+ * @param groupField The field to group by
+ * @param type The {@link ValueType} which is used to select a concrete implementation.
+ * @param diskResident Wether the values to group by should be disk resident
+ * @return the most optimal all groups collector implementation for grouping by {@link IndexDocValues}
+ */
+ public static DVAllGroupsCollector create(String groupField, ValueType type, boolean diskResident) {
+ return create(groupField, type, diskResident, DEFAULT_INITIAL_SIZE);
+ }
+
+ final String groupField;
+ final ValueType valueType;
+ final boolean diskResident;
+ final Collection<GROUP_VALUE_TYPE> groups;
+
+ DVAllGroupsCollector(String groupField, ValueType valueType, boolean diskResident, Collection<GROUP_VALUE_TYPE> groups) {
+ this.groupField = groupField;
+ this.valueType = valueType;
+ this.diskResident = diskResident;
+ this.groups = groups;
+ }
+
+ @Override
+ public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException {
+ final IndexDocValues dv = readerContext.reader.docValues(groupField);
+ final IndexDocValues.Source dvSource;
+ if (dv != null) {
+ dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+ } else {
+ dvSource = getDefaultSource(readerContext);
+ }
+ setDocValuesSources(dvSource, readerContext);
+ }
+
+ /**
+ * Sets the idv source for concrete implementations to use.
+ *
+ * @param source The idv source to be used by concrete implementations
+ * @param readerContext The current reader context
+ */
+ protected abstract void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext);
+
+ /**
+ * @return The default source when no doc values are available.
+ * @param readerContext The current reader context
+ */
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSource(valueType);
+ }
+
+ static class Lng extends DVAllGroupsCollector<Long> {
+
+ private IndexDocValues.Source source;
+
+ Lng(String groupField, ValueType valueType, boolean diskResident) {
+ super(groupField, valueType, diskResident, new TreeSet<Long>());
+ }
+
+ public void collect(int doc) throws IOException {
+ long value = source.getInt(doc);
+ if (!groups.contains(value)) {
+ groups.add(value);
+ }
+ }
+
+ public Collection<Long> getGroups() {
+ return groups;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+
+ }
+
+ static class Dbl extends DVAllGroupsCollector<Double> {
+
+ private IndexDocValues.Source source;
+
+ Dbl(String groupField, ValueType valueType, boolean diskResident) {
+ super(groupField, valueType, diskResident, new TreeSet<Double>());
+ }
+
+ public void collect(int doc) throws IOException {
+ double value = source.getFloat(doc);
+ if (!groups.contains(value)) {
+ groups.add(value);
+ }
+ }
+
+ public Collection<Double> getGroups() {
+ return groups;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+
+ }
+
+ static class BR extends DVAllGroupsCollector<BytesRef> {
+
+ private final BytesRef spare = new BytesRef();
+
+ private IndexDocValues.Source source;
+
+ BR(String groupField, ValueType valueType, boolean diskResident) {
+ super(groupField, valueType, diskResident, new TreeSet<BytesRef>());
+ }
+
+ public void collect(int doc) throws IOException {
+ BytesRef value = source.getBytes(doc, spare);
+ if (!groups.contains(value)) {
+ groups.add(new BytesRef(value));
+ }
+ }
+
+ public Collection<BytesRef> getGroups() {
+ return groups;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+
+ }
+
+ static class SortedBR extends DVAllGroupsCollector<BytesRef> {
+
+ private final SentinelIntSet ordSet;
+ private final BytesRef spare = new BytesRef();
+
+ private IndexDocValues.SortedSource source;
+
+ SortedBR(String groupField, ValueType valueType, boolean diskResident, int initialSize) {
+ super(groupField, valueType, diskResident, new ArrayList<BytesRef>(initialSize));
+ ordSet = new SentinelIntSet(initialSize, -1);
+ }
+
+ public void collect(int doc) throws IOException {
+ int ord = source.ord(doc);
+ if (!ordSet.exists(ord)) {
+ ordSet.put(ord);
+ BytesRef value = source.getBytes(doc, new BytesRef());
+ groups.add(value);
+ }
+ }
+
+ public Collection<BytesRef> getGroups() {
+ return groups;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source.asSortedSource();
+
+ ordSet.clear();
+ for (BytesRef countedGroup : groups) {
+ int ord = this.source.getByValue(countedGroup, spare);
+ if (ord >= 0) {
+ ordSet.put(ord);
+ }
+ }
+ }
+
+ @Override
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSortedSource(valueType, readerContext.reader.maxDoc());
+ }
+
+ }
+
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java?rev=1205960&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVFirstPassGroupingCollector.java Thu Nov 24 19:07:59 2011
@@ -0,0 +1,205 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.IndexDocValues;
+import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.grouping.AbstractFirstPassGroupingCollector;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+
+/**
+ * IDV based Implementations of {@link AbstractFirstPassGroupingCollector}.
+ *
+ * @lucene.experimental
+ */
+public abstract class DVFirstPassGroupingCollector<GROUP_VALUE_TYPE> extends AbstractFirstPassGroupingCollector<GROUP_VALUE_TYPE> {
+
+ final String groupField;
+ final boolean diskResident;
+ final ValueType valueType;
+
+ public static DVFirstPassGroupingCollector create(Sort groupSort, int topNGroups, String groupField, ValueType type, boolean diskResident) throws IOException {
+ switch (type) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ return new Lng(groupSort, topNGroups, groupField, diskResident, type);
+ case FLOAT_32:
+ case FLOAT_64:
+ return new Dbl(groupSort, topNGroups, groupField, diskResident, type);
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ return new BR(groupSort, topNGroups, groupField, diskResident, type);
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ return new SortedBR(groupSort, topNGroups, groupField, diskResident, type);
+ default:
+ throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ }
+ }
+
+ DVFirstPassGroupingCollector(Sort groupSort, int topNGroups, String groupField, boolean diskResident, ValueType valueType) throws IOException {
+ super(groupSort, topNGroups);
+ this.groupField = groupField;
+ this.diskResident = diskResident;
+ this.valueType = valueType;
+ }
+
+ @Override
+ public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+
+ final IndexDocValues dv = readerContext.reader.docValues(groupField);
+ final IndexDocValues.Source dvSource;
+ if (dv != null) {
+ dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+ } else {
+ dvSource = getDefaultSource(readerContext);
+ }
+ setDocValuesSources(dvSource);
+ }
+
+ /**
+ * Sets the idv source for concrete implementations to use.
+ *
+ * @param source The idv source to be used by concrete implementations
+ */
+ protected abstract void setDocValuesSources(IndexDocValues.Source source);
+
+ /**
+ * @return The default source when no doc values are available.
+ * @param readerContext The current reader context
+ */
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSource(valueType);
+ }
+
+ static class Lng extends DVFirstPassGroupingCollector<Long> {
+
+ private IndexDocValues.Source source;
+
+ Lng(Sort groupSort, int topNGroups, String groupField, boolean diskResident, ValueType type) throws IOException {
+ super(groupSort, topNGroups, groupField, diskResident, type);
+ }
+
+ protected Long getDocGroupValue(int doc) {
+ return source.getInt(doc);
+ }
+
+ protected Long copyDocGroupValue(Long groupValue, Long reuse) {
+ return groupValue;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+ }
+
+ static class Dbl extends DVFirstPassGroupingCollector<Double> {
+
+ private IndexDocValues.Source source;
+
+ Dbl(Sort groupSort, int topNGroups, String groupField, boolean diskResident, ValueType type) throws IOException {
+ super(groupSort, topNGroups, groupField, diskResident, type);
+ }
+
+ protected Double getDocGroupValue(int doc) {
+ return source.getFloat(doc);
+ }
+
+ protected Double copyDocGroupValue(Double groupValue, Double reuse) {
+ return groupValue;
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+ }
+
+ static class BR extends DVFirstPassGroupingCollector<BytesRef> {
+
+ private IndexDocValues.Source source;
+ private final BytesRef spare = new BytesRef();
+
+ BR(Sort groupSort, int topNGroups, String groupField, boolean diskResident, ValueType type) throws IOException {
+ super(groupSort, topNGroups, groupField, diskResident, type);
+ }
+
+ protected BytesRef getDocGroupValue(int doc) {
+ return source.getBytes(doc, spare);
+ }
+
+ protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
+ if (reuse != null) {
+ reuse.copy(groupValue);
+ return reuse;
+ } else {
+ return new BytesRef(groupValue);
+ }
+ }
+
+ @Override
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.source = source;
+ }
+ }
+
+ static class SortedBR extends DVFirstPassGroupingCollector<BytesRef> {
+
+ private IndexDocValues.SortedSource sortedSource;
+ private final BytesRef spare = new BytesRef();
+
+ SortedBR(Sort groupSort, int topNGroups, String groupField, boolean diskResident, ValueType type) throws IOException {
+ super(groupSort, topNGroups, groupField, diskResident, type);
+ }
+
+ @Override
+ protected BytesRef getDocGroupValue(int doc) {
+ return sortedSource.getBytes(doc, spare);
+ }
+
+ @Override
+ protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
+ if (reuse != null) {
+ reuse.copy(groupValue);
+ return reuse;
+ } else {
+ return new BytesRef(groupValue);
+ }
+ }
+
+ @Override
+ protected void setDocValuesSources(IndexDocValues.Source source) {
+ this.sortedSource = source.asSortedSource();
+ }
+
+ @Override
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSortedSource(valueType, readerContext.reader.maxDoc());
+ }
+ }
+
+}
Added: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java?rev=1205960&view=auto
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java (added)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/dv/DVSecondPassGroupingCollector.java Thu Nov 24 19:07:59 2011
@@ -0,0 +1,230 @@
+package org.apache.lucene.search.grouping.dv;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.values.IndexDocValues;
+import org.apache.lucene.index.values.ValueType;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.grouping.AbstractSecondPassGroupingCollector;
+import org.apache.lucene.search.grouping.SearchGroup;
+import org.apache.lucene.search.grouping.SentinelIntSet;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.util.Collection;
+
+/**
+ * IDV based implementation of {@link AbstractSecondPassGroupingCollector}.
+ *
+ * @lucene.experimental
+ */
+public abstract class DVSecondPassGroupingCollector<GROUP_VALUE> extends AbstractSecondPassGroupingCollector<GROUP_VALUE> {
+
+ /**
+ * Constructs a {@link DVSecondPassGroupingCollector}.
+ * Selects and constructs the most optimal second pass collector implementation for grouping by {@link IndexDocValues}.
+ *
+ * @param groupField The field to group by
+ * @param diskResident Whether the values to group by should be disk resident
+ * @param type The {@link org.apache.lucene.index.values.ValueType} which is used to select a concrete implementation.
+ * @param searchGroups The groups from the first phase search
+ * @param groupSort The sort used for the groups
+ * @param withinGroupSort The sort used for documents inside a group
+ * @param maxDocsPerGroup The maximum number of documents to collect per group
+ * @param getScores Whether to include scores for the documents inside a group
+ * @param getMaxScores Whether to keep track of the higest score per group
+ * @param fillSortFields Whether to include the sort values
+ * @return the most optimal second pass collector implementation for grouping by {@link IndexDocValues}
+ * @throws IOException If I/O related errors occur
+ */
+ @SuppressWarnings("unchecked")
+ public static DVSecondPassGroupingCollector create(String groupField,
+ boolean diskResident,
+ ValueType type,
+ Collection<SearchGroup> searchGroups,
+ Sort groupSort,
+ Sort withinGroupSort,
+ int maxDocsPerGroup,
+ boolean getScores,
+ boolean getMaxScores,
+ boolean fillSortFields) throws IOException {
+ switch (type) {
+ case VAR_INTS:
+ case FIXED_INTS_8:
+ case FIXED_INTS_16:
+ case FIXED_INTS_32:
+ case FIXED_INTS_64:
+ // Type erasure b/c otherwise we have inconvertible types...
+ return new Lng(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ case FLOAT_32:
+ case FLOAT_64:
+ // Type erasure b/c otherwise we have inconvertible types...
+ return new Dbl(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ case BYTES_FIXED_STRAIGHT:
+ case BYTES_FIXED_DEREF:
+ case BYTES_VAR_STRAIGHT:
+ case BYTES_VAR_DEREF:
+ // Type erasure b/c otherwise we have inconvertible types...
+ return new BR(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ case BYTES_VAR_SORTED:
+ case BYTES_FIXED_SORTED:
+ // Type erasure b/c otherwise we have inconvertible types...
+ return new SortedBR(groupField, type, diskResident, (Collection) searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ default:
+ throw new IllegalArgumentException(String.format("ValueType %s not supported", type));
+ }
+ }
+
+ final String groupField;
+ final ValueType valueType;
+ final boolean diskResident;
+
+ DVSecondPassGroupingCollector(String groupField, ValueType valueType, boolean diskResident, Collection<SearchGroup<GROUP_VALUE>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
+ super(searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ this.groupField = groupField;
+ this.valueType = valueType;
+ this.diskResident = diskResident;
+ }
+
+ @Override
+ public void setNextReader(IndexReader.AtomicReaderContext readerContext) throws IOException {
+ super.setNextReader(readerContext);
+
+ final IndexDocValues dv = readerContext.reader.docValues(groupField);
+ final IndexDocValues.Source dvSource;
+ if (dv != null) {
+ dvSource = diskResident ? dv.getDirectSource() : dv.getSource();
+ } else {
+ dvSource = getDefaultSource(readerContext);
+ }
+ setDocValuesSources(dvSource, readerContext);
+ }
+
+ /**
+ * Sets the idv source for concrete implementations to use.
+ *
+ * @param source The idv source to be used by concrete implementations
+ * @param readerContext The current reader context
+ */
+ protected abstract void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext);
+
+ /**
+ * @return The default source when no doc values are available.
+ * @param readerContext The current reader context
+ */
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSource(valueType);
+ }
+
+ static class Lng extends DVSecondPassGroupingCollector<Long> {
+
+ private IndexDocValues.Source source;
+
+ Lng(String groupField, ValueType valueType, boolean diskResident, Collection<SearchGroup<Long>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
+ super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ }
+
+ protected SearchGroupDocs<Long> retrieveGroup(int doc) throws IOException {
+ return groupMap.get(source.getInt(doc));
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+ }
+
+ static class Dbl extends DVSecondPassGroupingCollector<Double> {
+
+ private IndexDocValues.Source source;
+
+ Dbl(String groupField, ValueType valueType, boolean diskResident, Collection<SearchGroup<Double>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
+ super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ }
+
+ protected SearchGroupDocs<Double> retrieveGroup(int doc) throws IOException {
+ return groupMap.get(source.getFloat(doc));
+ }
+
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+ }
+
+ static class BR extends DVSecondPassGroupingCollector<BytesRef> {
+
+ private IndexDocValues.Source source;
+ private final BytesRef spare = new BytesRef();
+
+ BR(String groupField, ValueType valueType, boolean diskResident, Collection<SearchGroup<BytesRef>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
+ super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ }
+
+ protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
+ return groupMap.get(source.getBytes(doc, spare));
+ }
+
+ @Override
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source;
+ }
+
+ }
+
+ static class SortedBR extends DVSecondPassGroupingCollector<BytesRef> {
+
+ private IndexDocValues.SortedSource source;
+ private final BytesRef spare = new BytesRef();
+ private final SentinelIntSet ordSet;
+
+ @SuppressWarnings("unchecked")
+ SortedBR(String groupField, ValueType valueType, boolean diskResident, Collection<SearchGroup<BytesRef>> searchGroups, Sort groupSort, Sort withinGroupSort, int maxDocsPerGroup, boolean getScores, boolean getMaxScores, boolean fillSortFields) throws IOException {
+ super(groupField, valueType, diskResident, searchGroups, groupSort, withinGroupSort, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
+ ordSet = new SentinelIntSet(groupMap.size(), -1);
+ groupDocs = (SearchGroupDocs<BytesRef>[]) new SearchGroupDocs[ordSet.keys.length];
+ }
+
+ protected SearchGroupDocs<BytesRef> retrieveGroup(int doc) throws IOException {
+ int slot = ordSet.find(source.ord(doc));
+ if (slot >= 0) {
+ return groupDocs[slot];
+ }
+
+ return null;
+ }
+
+ @Override
+ protected void setDocValuesSources(IndexDocValues.Source source, IndexReader.AtomicReaderContext readerContext) {
+ this.source = source.asSortedSource();
+
+ ordSet.clear();
+ for (SearchGroupDocs<BytesRef> group : groupMap.values()) {
+ int ord = this.source.getByValue(group.groupValue, spare);
+ if (ord >= 0) {
+ groupDocs[ordSet.put(ord)] = group;
+ }
+ }
+ }
+
+ @Override
+ protected IndexDocValues.Source getDefaultSource(IndexReader.AtomicReaderContext readerContext) {
+ return IndexDocValues.getDefaultSortedSource(valueType, readerContext.reader.maxDoc());
+ }
+ }
+
+}
Modified: lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html?rev=1205960&r1=1205959&r2=1205960&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html (original)
+++ lucene/dev/trunk/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html Thu Nov 24 19:07:59 2011
@@ -179,11 +179,44 @@ fields, <code>FieldCache</code>, etc.).
FixedBitSet groupHeadsBitSet = c.retrieveGroupHeads(maxDoc)
</pre>
-<p>For each of the above collectors there is also a variant that works with <code>ValueSource</code> instead of
+<p>For each of the above collector types there is also a variant that works with <code>ValueSource</code> instead of
of fields. Concretely this means that these variants can work with functions. These variants are slower than
there term based counter parts. These implementations are located in the
<code>org.apache.lucene.search.grouping.function</code> package.
</p>
+<p>
+ There are also IndexDocValues based implementations available for the group collectors. There are factory methods
+ available for creating idv based instances. A typical example using idv based grouping collectors:
+</p>
+
+<pre class="prettyprint">
+ boolean diskResident = true; // Whether values should fetched directly from disk by passing the Java heap space.
+ AbstractFirstPassGroupingCollector c1 = DVFirstPassGroupingCollector.create(
+ groupSort, groupOffset+topNGroups, "author", ValueType.BYTES_VAR_SORTED, diskResident
+ );
+
+ s.search(new TermQuery(new Term("content", searchTerm)), c1);
+
+ Collection<SearchGroup<BytesRef>> topGroups = c1.getTopGroups(groupOffset, fillFields);
+
+ if (topGroups == null) {
+ // No groups matched
+ return;
+ }
+
+ boolean getScores = true;
+ boolean getMaxScores = true;
+ boolean fillFields = true;
+ AbstractSecondPassGroupingCollector<BytesRef> c2 = DVSecondPassGroupingCollector.create(
+ "author", diskResident, ValueType.BYTES_VAR_SORTED, topGroups, groupSort, docSort,
+ docOffset+docsPerGroup, getScores, getMaxScores, fillFields
+ );
+
+ s.search(new TermQuery(new Term("content", searchTerm)), c2);
+ TopGroups<BytesRef> groupsResult = c2.getTopGroups(docOffset);
+ // Render groupsResult...
+</pre>
+
</body>
</html>
Modified: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1205960&r1=1205959&r2=1205960&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Thu Nov 24 19:07:59 2011
@@ -18,17 +18,16 @@ package org.apache.lucene.search.groupin
*/
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.NumericField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.index.SlowMultiReaderWrapper;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.grouping.dv.DVAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.function.FunctionAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.term.TermAllGroupHeadsCollector;
import org.apache.lucene.store.Directory;
@@ -42,6 +41,10 @@ import java.util.*;
public class AllGroupHeadsCollectorTest extends LuceneTestCase {
+ private static final ValueType[] vts = new ValueType[]{
+ ValueType.BYTES_VAR_DEREF, ValueType.BYTES_VAR_STRAIGHT, ValueType.BYTES_VAR_SORTED
+ };
+
public void testBasic() throws Exception {
final String groupField = "author";
Directory dir = newDirectory();
@@ -50,24 +53,26 @@ public class AllGroupHeadsCollectorTest
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+ ValueType valueType = vts[random.nextInt(vts.length)];
// 0
Document doc = new Document();
- doc.add(newField(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV, valueType);
doc.add(newField("content", "random text", TextField.TYPE_STORED));
doc.add(newField("id", "1", StringField.TYPE_STORED));
w.addDocument(doc);
// 1
doc = new Document();
- doc.add(newField(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV, valueType);
doc.add(newField("content", "some more random text blob", TextField.TYPE_STORED));
doc.add(newField("id", "2", StringField.TYPE_STORED));
w.addDocument(doc);
// 2
doc = new Document();
- doc.add(newField(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV, valueType);
doc.add(newField("content", "some more random textual data", TextField.TYPE_STORED));
doc.add(newField("id", "3", StringField.TYPE_STORED));
w.addDocument(doc);
@@ -75,21 +80,21 @@ public class AllGroupHeadsCollectorTest
// 3
doc = new Document();
- doc.add(newField(groupField, "author2", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author2", canUseIDV, valueType);
doc.add(newField("content", "some random text", TextField.TYPE_STORED));
doc.add(newField("id", "4", StringField.TYPE_STORED));
w.addDocument(doc);
// 4
doc = new Document();
- doc.add(newField(groupField, "author3", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author3", canUseIDV, valueType);
doc.add(newField("content", "some more random text", TextField.TYPE_STORED));
doc.add(newField("id", "5", StringField.TYPE_STORED));
w.addDocument(doc);
// 5
doc = new Document();
- doc.add(newField(groupField, "author3", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author3", canUseIDV, valueType);
doc.add(newField("content", "random blob", TextField.TYPE_STORED));
doc.add(newField("id", "6", StringField.TYPE_STORED));
w.addDocument(doc);
@@ -108,34 +113,38 @@ public class AllGroupHeadsCollectorTest
IndexReader reader = w.getReader();
IndexSearcher indexSearcher = new IndexSearcher(reader);
+ if (SlowMultiReaderWrapper.class.isAssignableFrom(reader.getClass())) {
+ canUseIDV = false;
+ }
+
w.close();
int maxDoc = reader.maxDoc();
Sort sortWithinGroup = new Sort(new SortField("id", SortField.Type.INT, true));
- AbstractAllGroupHeadsCollector c1 = createRandomCollector(groupField, sortWithinGroup);
+ AbstractAllGroupHeadsCollector c1 = createRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
assertTrue(arrayContains(new int[]{2, 3, 5, 7}, c1.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[]{2, 3, 5, 7}, c1.retrieveGroupHeads(maxDoc), maxDoc));
- AbstractAllGroupHeadsCollector c2 = createRandomCollector(groupField, sortWithinGroup);
+ AbstractAllGroupHeadsCollector c2 = createRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
assertTrue(arrayContains(new int[]{2, 3, 4}, c2.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[]{2, 3, 4}, c2.retrieveGroupHeads(maxDoc), maxDoc));
- AbstractAllGroupHeadsCollector c3 = createRandomCollector(groupField, sortWithinGroup);
+ AbstractAllGroupHeadsCollector c3 = createRandomCollector(groupField, sortWithinGroup, canUseIDV, valueType);
indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
assertTrue(arrayContains(new int[]{1, 5}, c3.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[]{1, 5}, c3.retrieveGroupHeads(maxDoc), maxDoc));
// STRING sort type triggers different implementation
Sort sortWithinGroup2 = new Sort(new SortField("id", SortField.Type.STRING, true));
- AbstractAllGroupHeadsCollector c4 = createRandomCollector(groupField, sortWithinGroup2);
+ AbstractAllGroupHeadsCollector c4 = createRandomCollector(groupField, sortWithinGroup2, canUseIDV, valueType);
indexSearcher.search(new TermQuery(new Term("content", "random")), c4);
assertTrue(arrayContains(new int[]{2, 3, 5, 7}, c4.retrieveGroupHeads()));
assertTrue(openBitSetContains(new int[]{2, 3, 5, 7}, c4.retrieveGroupHeads(maxDoc), maxDoc));
Sort sortWithinGroup3 = new Sort(new SortField("id", SortField.Type.STRING, false));
- AbstractAllGroupHeadsCollector c5 = createRandomCollector(groupField, sortWithinGroup3);
+ AbstractAllGroupHeadsCollector c5 = createRandomCollector(groupField, sortWithinGroup3, canUseIDV, valueType);
indexSearcher.search(new TermQuery(new Term("content", "random")), c5);
// 7 b/c higher doc id wins, even if order of field is in not in reverse.
assertTrue(arrayContains(new int[]{0, 3, 4, 6}, c5.retrieveGroupHeads()));
@@ -161,7 +170,13 @@ public class AllGroupHeadsCollectorTest
final List<BytesRef> groups = new ArrayList<BytesRef>();
for (int i = 0; i < numGroups; i++) {
- groups.add(new BytesRef(_TestUtil.randomRealisticUnicodeString(random)));
+ String randomValue;
+ do {
+ // B/c of DV based impl we can't see the difference between an empty string and a null value.
+ // For that reason we don't generate empty string groups.
+ randomValue = _TestUtil.randomRealisticUnicodeString(random);
+ } while ("".equals(randomValue));
+ groups.add(new BytesRef(randomValue));
}
final String[] contentStrings = new String[_TestUtil.nextInt(random, 2, 20)];
if (VERBOSE) {
@@ -186,11 +201,19 @@ public class AllGroupHeadsCollectorTest
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)));
+ boolean preFlex = "Lucene3x".equals(w.w.getConfig().getCodec().getName());
+ boolean canUseIDV = !preFlex;
+ ValueType valueType = vts[random.nextInt(vts.length)];
Document doc = new Document();
Document docNoGroup = new Document();
Field group = newField("group", "", StringField.TYPE_UNSTORED);
doc.add(group);
+ IndexDocValuesField valuesField = null;
+ if (canUseIDV) {
+ valuesField = new IndexDocValuesField("group");
+ doc.add(valuesField);
+ }
Field sort1 = newField("sort1", "", StringField.TYPE_UNSTORED);
doc.add(sort1);
docNoGroup.add(sort1);
@@ -233,6 +256,9 @@ public class AllGroupHeadsCollectorTest
groupDocs[i] = groupDoc;
if (groupDoc.group != null) {
group.setValue(groupDoc.group.utf8ToString());
+ if (canUseIDV) {
+ valuesField.setBytes(new BytesRef(groupDoc.group.utf8ToString()), valueType);
+ }
}
sort1.setValue(groupDoc.sort1.utf8ToString());
sort2.setValue(groupDoc.sort2.utf8ToString());
@@ -259,6 +285,11 @@ public class AllGroupHeadsCollectorTest
try {
final IndexSearcher s = newSearcher(r);
+ if (SlowMultiReaderWrapper.class.isAssignableFrom(s.getIndexReader().getClass())) {
+ canUseIDV = false;
+ } else {
+ canUseIDV = !preFlex;
+ }
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
@@ -284,7 +315,7 @@ public class AllGroupHeadsCollectorTest
final String searchTerm = "real" + random.nextInt(3);
boolean sortByScoreOnly = random.nextBoolean();
Sort sortWithinGroup = getRandomSort(sortByScoreOnly);
- AbstractAllGroupHeadsCollector allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup);
+ AbstractAllGroupHeadsCollector allGroupHeadsCollector = createRandomCollector("group", sortWithinGroup, canUseIDV, valueType);
s.search(new TermQuery(new Term("content", searchTerm)), allGroupHeadsCollector);
int[] expectedGroupHeads = createExpectedGroupHeads(searchTerm, groupDocs, sortWithinGroup, sortByScoreOnly, fieldIdToDocID);
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
@@ -475,15 +506,33 @@ public class AllGroupHeadsCollectorTest
};
}
- private AbstractAllGroupHeadsCollector createRandomCollector(String groupField, Sort sortWithinGroup) throws IOException {
+ private AbstractAllGroupHeadsCollector createRandomCollector(String groupField, Sort sortWithinGroup, boolean canUseIDV, ValueType valueType) throws IOException {
+ AbstractAllGroupHeadsCollector collector;
if (random.nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
- return new FunctionAllGroupHeadsCollector(vs, new HashMap(), sortWithinGroup);
+ collector = new FunctionAllGroupHeadsCollector(vs, new HashMap(), sortWithinGroup);
+ } else if (canUseIDV && random.nextBoolean()) {
+ boolean diskResident = random.nextBoolean();
+ collector = DVAllGroupHeadsCollector.create(groupField, sortWithinGroup, valueType, diskResident);
} else {
- return TermAllGroupHeadsCollector.create(groupField, sortWithinGroup);
+ collector = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup);
+ }
+
+ if (VERBOSE) {
+ System.out.println("Selected implementation: " + collector.getClass().getSimpleName());
}
+
+ return collector;
}
+ private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, ValueType valueType) {
+ doc.add(new Field(groupField, value, TextField.TYPE_STORED));
+ if (canUseIDV) {
+ IndexDocValuesField valuesField = new IndexDocValuesField(groupField);
+ valuesField.setBytes(new BytesRef(value), valueType);
+ doc.add(valuesField);
+ }
+ }
private static class GroupDoc {
final int id;
Modified: lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java?rev=1205960&r1=1205959&r2=1205960&view=diff
==============================================================================
--- lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (original)
+++ lucene/dev/trunk/modules/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java Thu Nov 24 19:07:59 2011
@@ -18,19 +18,19 @@ package org.apache.lucene.search.groupin
*/
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
+import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.function.FunctionAllGroupsCollector;
+import org.apache.lucene.search.grouping.dv.DVAllGroupsCollector;
import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
@@ -46,27 +46,29 @@ public class AllGroupsCollectorTest exte
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
- random,
- dir,
- newIndexWriterConfig(TEST_VERSION_CURRENT,
- new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ random,
+ dir,
+ newIndexWriterConfig(TEST_VERSION_CURRENT,
+ new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
+ boolean canUseIDV = !"Lucene3x".equals(w.w.getConfig().getCodec().getName());
+
// 0
Document doc = new Document();
- doc.add(new Field(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV);
doc.add(new Field("content", "random text", TextField.TYPE_STORED));
doc.add(new Field("id", "1", customType));
w.addDocument(doc);
// 1
doc = new Document();
- doc.add(new Field(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV);
doc.add(new Field("content", "some more random text blob", TextField.TYPE_STORED));
doc.add(new Field("id", "2", customType));
w.addDocument(doc);
// 2
doc = new Document();
- doc.add(new Field(groupField, "author1", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author1", canUseIDV);
doc.add(new Field("content", "some more random textual data", TextField.TYPE_STORED));
doc.add(new Field("id", "3", customType));
w.addDocument(doc);
@@ -74,21 +76,21 @@ public class AllGroupsCollectorTest exte
// 3
doc = new Document();
- doc.add(new Field(groupField, "author2", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author2", canUseIDV);
doc.add(new Field("content", "some random text", TextField.TYPE_STORED));
doc.add(new Field("id", "4", customType));
w.addDocument(doc);
// 4
doc = new Document();
- doc.add(new Field(groupField, "author3", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author3", canUseIDV);
doc.add(new Field("content", "some more random text", TextField.TYPE_STORED));
doc.add(new Field("id", "5", customType));
w.addDocument(doc);
// 5
doc = new Document();
- doc.add(new Field(groupField, "author3", TextField.TYPE_STORED));
+ addGroupField(doc, groupField, "author3", canUseIDV);
doc.add(new Field("content", "random blob", TextField.TYPE_STORED));
doc.add(new Field("id", "6", customType));
w.addDocument(doc);
@@ -102,15 +104,15 @@ public class AllGroupsCollectorTest exte
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
- AbstractAllGroupsCollector c1 = createRandomCollector(groupField);
+ AbstractAllGroupsCollector c1 = createRandomCollector(groupField, canUseIDV);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
assertEquals(4, c1.getGroupCount());
- AbstractAllGroupsCollector c2 = createRandomCollector(groupField);
+ AbstractAllGroupsCollector c2 = createRandomCollector(groupField, canUseIDV);
indexSearcher.search(new TermQuery(new Term("content", "some")), c2);
assertEquals(3, c2.getGroupCount());
- AbstractAllGroupsCollector c3 = createRandomCollector(groupField);
+ AbstractAllGroupsCollector c3 = createRandomCollector(groupField, canUseIDV);
indexSearcher.search(new TermQuery(new Term("content", "blob")), c3);
assertEquals(2, c3.getGroupCount());
@@ -118,13 +120,32 @@ public class AllGroupsCollectorTest exte
dir.close();
}
- private AbstractAllGroupsCollector createRandomCollector(String groupField) throws IOException {
- if (random.nextBoolean()) {
- return new TermAllGroupsCollector(groupField);
+ private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
+ doc.add(new Field(groupField, value, TextField.TYPE_STORED));
+ if (canUseIDV) {
+ IndexDocValuesField valuesField = new IndexDocValuesField(groupField);
+ valuesField.setBytes(new BytesRef(value), ValueType.BYTES_VAR_SORTED);
+ doc.add(valuesField);
+ }
+ }
+
+ private AbstractAllGroupsCollector createRandomCollector(String groupField, boolean canUseIDV) throws IOException {
+ AbstractAllGroupsCollector selected;
+ if (random.nextBoolean() && canUseIDV) {
+ boolean diskResident = random.nextBoolean();
+ selected = DVAllGroupsCollector.create(groupField, ValueType.BYTES_VAR_SORTED, diskResident);
+ } else if (random.nextBoolean()) {
+ selected = new TermAllGroupsCollector(groupField);
} else {
ValueSource vs = new BytesRefFieldSource(groupField);
- return new FunctionAllGroupsCollector(vs, new HashMap());
+ selected = new FunctionAllGroupsCollector(vs, new HashMap());
}
+
+ if (VERBOSE) {
+ System.out.println("Selected implementation: " + selected.getClass().getName());
+ }
+
+ return selected;
}
}