You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2013/02/07 21:48:28 UTC
svn commit: r1443717 [11/14] - in /lucene/dev/trunk: ./ dev-tools/ lucene/
lucene/analysis/ lucene/analysis/common/
lucene/analysis/icu/src/java/org/apache/lucene/collation/
lucene/analysis/icu/src/test/org/apache/lucene/collation/ lucene/backwards/
lu...
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupHeadsCollectorTest.java Thu Feb 7 20:48:21 2013
@@ -23,7 +23,7 @@ import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
@@ -31,7 +31,6 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.*;
-import org.apache.lucene.search.grouping.dv.DVAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.function.FunctionAllGroupHeadsCollector;
import org.apache.lucene.search.grouping.term.TermAllGroupHeadsCollector;
import org.apache.lucene.store.Directory;
@@ -42,8 +41,8 @@ import org.apache.lucene.util._TestUtil;
public class AllGroupHeadsCollectorTest extends LuceneTestCase {
- private static final Type[] vts = new Type[]{
- Type.BYTES_VAR_DEREF, Type.BYTES_VAR_STRAIGHT, Type.BYTES_VAR_SORTED
+ private static final DocValuesType[] vts = new DocValuesType[]{
+ DocValuesType.BINARY, DocValuesType.SORTED
};
public void testBasic() throws Exception {
@@ -55,7 +54,7 @@ public class AllGroupHeadsCollectorTest
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
boolean canUseIDV = true;
- Type valueType = vts[random().nextInt(vts.length)];
+ DocValuesType valueType = vts[random().nextInt(vts.length)];
// 0
Document doc = new Document();
@@ -203,7 +202,7 @@ public class AllGroupHeadsCollectorTest
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())));
boolean canUseIDV = true;
- Type valueType = vts[random().nextInt(vts.length)];
+ DocValuesType valueType = vts[random().nextInt(vts.length)];
Document doc = new Document();
Document docNoGroup = new Document();
@@ -212,14 +211,11 @@ public class AllGroupHeadsCollectorTest
Field valuesField = null;
if (canUseIDV) {
switch(valueType) {
- case BYTES_VAR_DEREF:
- valuesField = new DerefBytesDocValuesField("group", new BytesRef());
+ case BINARY:
+ valuesField = new BinaryDocValuesField("group_dv", new BytesRef());
break;
- case BYTES_VAR_STRAIGHT:
- valuesField = new StraightBytesDocValuesField("group", new BytesRef());
- break;
- case BYTES_VAR_SORTED:
- valuesField = new SortedBytesDocValuesField("group", new BytesRef());
+ case SORTED:
+ valuesField = new SortedDocValuesField("group_dv", new BytesRef());
break;
default:
fail("unhandled type");
@@ -288,10 +284,10 @@ public class AllGroupHeadsCollectorTest
w.close();
// NOTE: intentional but temporary field cache insanity!
- final int[] docIdToFieldId = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
+ final FieldCache.Ints docIdToFieldId = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
final int[] fieldIdToDocID = new int[numDocs];
- for (int i = 0; i < docIdToFieldId.length; i++) {
- int fieldId = docIdToFieldId[i];
+ for (int i = 0; i < numDocs; i++) {
+ int fieldId = docIdToFieldId.get(i);
fieldIdToDocID[fieldId] = i;
}
@@ -306,11 +302,11 @@ public class AllGroupHeadsCollectorTest
for (int contentID = 0; contentID < 3; contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real" + contentID)), numDocs).scoreDocs;
for (ScoreDoc hit : hits) {
- final GroupDoc gd = groupDocs[docIdToFieldId[hit.doc]];
+ final GroupDoc gd = groupDocs[docIdToFieldId.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
int docId = gd.id;
- assertEquals(docId, docIdToFieldId[hit.doc]);
+ assertEquals(docId, docIdToFieldId.get(hit.doc));
}
}
@@ -333,7 +329,7 @@ public class AllGroupHeadsCollectorTest
int[] actualGroupHeads = allGroupHeadsCollector.retrieveGroupHeads();
// The actual group heads contains Lucene ids. Need to change them into our id value.
for (int i = 0; i < actualGroupHeads.length; i++) {
- actualGroupHeads[i] = docIdToFieldId[actualGroupHeads[i]];
+ actualGroupHeads[i] = docIdToFieldId.get(actualGroupHeads[i]);
}
// Allows us the easily iterate and assert the actual and expected results.
Arrays.sort(expectedGroupHeads);
@@ -518,14 +514,11 @@ public class AllGroupHeadsCollectorTest
}
@SuppressWarnings({"unchecked","rawtypes"})
- private AbstractAllGroupHeadsCollector<?> createRandomCollector(String groupField, Sort sortWithinGroup, boolean canUseIDV, Type valueType) {
+ private AbstractAllGroupHeadsCollector<?> createRandomCollector(String groupField, Sort sortWithinGroup, boolean canUseIDV, DocValuesType valueType) {
AbstractAllGroupHeadsCollector<? extends AbstractAllGroupHeadsCollector.GroupHead> collector;
if (random().nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
collector = new FunctionAllGroupHeadsCollector(vs, new HashMap<Object, Object>(), sortWithinGroup);
- } else if (canUseIDV && random().nextBoolean()) {
- boolean diskResident = random().nextBoolean();
- collector = DVAllGroupHeadsCollector.create(groupField, sortWithinGroup, valueType, diskResident);
} else {
collector = TermAllGroupHeadsCollector.create(groupField, sortWithinGroup);
}
@@ -537,19 +530,16 @@ public class AllGroupHeadsCollectorTest
return collector;
}
- private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, Type valueType) {
+ private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV, DocValuesType valueType) {
doc.add(new TextField(groupField, value, Field.Store.YES));
if (canUseIDV) {
Field valuesField = null;
switch(valueType) {
- case BYTES_VAR_DEREF:
- valuesField = new DerefBytesDocValuesField(groupField, new BytesRef(value));
- break;
- case BYTES_VAR_STRAIGHT:
- valuesField = new StraightBytesDocValuesField(groupField, new BytesRef(value));
+ case BINARY:
+ valuesField = new BinaryDocValuesField(groupField + "_dv", new BytesRef(value));
break;
- case BYTES_VAR_SORTED:
- valuesField = new SortedBytesDocValuesField(groupField, new BytesRef(value));
+ case SORTED:
+ valuesField = new SortedDocValuesField(groupField + "_dv", new BytesRef(value));
break;
default:
fail("unhandled type");
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/AllGroupsCollectorTest.java Thu Feb 7 20:48:21 2013
@@ -21,13 +21,12 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.*;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocValues.Type;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.grouping.function.FunctionAllGroupsCollector;
-import org.apache.lucene.search.grouping.dv.DVAllGroupsCollector;
import org.apache.lucene.search.grouping.term.TermAllGroupsCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -122,16 +121,13 @@ public class AllGroupsCollectorTest exte
private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
doc.add(new TextField(groupField, value, Field.Store.YES));
if (canUseIDV) {
- doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value)));
+ doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
}
}
private AbstractAllGroupsCollector<?> createRandomCollector(String groupField, boolean canUseIDV) {
AbstractAllGroupsCollector<?> selected;
- if (random().nextBoolean() && canUseIDV) {
- boolean diskResident = random().nextBoolean();
- selected = DVAllGroupsCollector.create(groupField, Type.BYTES_VAR_SORTED, diskResident);
- } else if (random().nextBoolean()) {
+ if (random().nextBoolean()) {
selected = new TermAllGroupsCollector(groupField);
} else {
ValueSource vs = new BytesRefFieldSource(groupField);
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/DistinctValuesCollectorTest.java Thu Feb 7 20:48:21 2013
@@ -23,13 +23,12 @@ import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
+import org.apache.lucene.index.FieldInfo.DocValuesType;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.grouping.dv.DVDistinctValuesCollector;
-import org.apache.lucene.search.grouping.dv.DVFirstPassGroupingCollector;
import org.apache.lucene.search.grouping.function.FunctionDistinctValuesCollector;
import org.apache.lucene.search.grouping.function.FunctionFirstPassGroupingCollector;
import org.apache.lucene.search.grouping.term.TermDistinctValuesCollector;
@@ -45,15 +44,16 @@ public class DistinctValuesCollectorTest
private final static NullComparator nullComparator = new NullComparator();
private final String groupField = "author";
+ private final String dvGroupField = "author_dv";
private final String countField = "publisher";
+ private final String dvCountField = "publisher_dv";
public void testSimple() throws Exception {
Random random = random();
- DocValues.Type[] dvTypes = new DocValues.Type[]{
- DocValues.Type.VAR_INTS,
- DocValues.Type.FLOAT_64,
- DocValues.Type.BYTES_VAR_STRAIGHT,
- DocValues.Type.BYTES_VAR_SORTED
+ DocValuesType[] dvTypes = new DocValuesType[]{
+ DocValuesType.NUMERIC,
+ DocValuesType.BINARY,
+ DocValuesType.SORTED,
};
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
@@ -62,7 +62,7 @@ public class DistinctValuesCollectorTest
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
boolean canUseDV = true;
- DocValues.Type dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
+ DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
Document doc = new Document();
addField(doc, groupField, "1", dvType);
@@ -232,7 +232,7 @@ public class DistinctValuesCollectorTest
for (int searchIter = 0; searchIter < 100; searchIter++) {
final IndexSearcher searcher = newSearcher(context.indexReader);
boolean useDv = context.dvType != null && random.nextBoolean();
- DocValues.Type dvType = useDv ? context.dvType : null;
+ DocValuesType dvType = useDv ? context.dvType : null;
String term = context.contentStrings[random.nextInt(context.contentStrings.length)];
Sort groupSort = new Sort(new SortField("id", SortField.Type.STRING));
int topN = 1 + random.nextInt(10);
@@ -250,7 +250,15 @@ public class DistinctValuesCollectorTest
if (VERBOSE) {
System.out.println("Index iter=" + indexIter);
System.out.println("Search iter=" + searchIter);
- System.out.println("Collector class name=" + distinctValuesCollector.getClass().getName());
+ System.out.println("1st pass collector class name=" + firstCollector.getClass().getName());
+ System.out.println("2nd pass collector class name=" + distinctValuesCollector.getClass().getName());
+ System.out.println("Search term=" + term);
+ System.out.println("DVType=" + dvType);
+ System.out.println("1st pass groups=" + firstCollector.getTopGroups(0, false));
+ System.out.println("Expected:");
+ printGroups(expectedResult);
+ System.out.println("Actual:");
+ printGroups(actualResult);
}
assertEquals(expectedResult.size(), actualResult.size());
@@ -263,7 +271,7 @@ public class DistinctValuesCollectorTest
Collections.sort(expectedUniqueValues, nullComparator);
List<Comparable<?>> actualUniqueValues = new ArrayList<Comparable<?>>(actual.uniqueValues);
Collections.sort(actualUniqueValues, nullComparator);
- for (int j = 0; j < expected.uniqueValues.size(); j++) {
+ for (int j = 0; j < expectedUniqueValues.size(); j++) {
assertValues(expectedUniqueValues.get(j), actualUniqueValues.get(j));
}
}
@@ -273,6 +281,25 @@ public class DistinctValuesCollectorTest
}
}
+ private void printGroups(List<AbstractDistinctValuesCollector.GroupCount<Comparable<?>>> results) {
+ for(int i=0;i<results.size();i++) {
+ AbstractDistinctValuesCollector.GroupCount<Comparable<?>> group = results.get(i);
+ Object gv = group.groupValue;
+ if (gv instanceof BytesRef) {
+ System.out.println(i + ": groupValue=" + ((BytesRef) gv).utf8ToString());
+ } else {
+ System.out.println(i + ": groupValue=" + gv);
+ }
+ for(Object o : group.uniqueValues) {
+ if (o instanceof BytesRef) {
+ System.out.println(" " + ((BytesRef) o).utf8ToString());
+ } else {
+ System.out.println(" " + o);
+ }
+ }
+ }
+ }
+
private void assertValues(Object expected, Object actual) {
if (expected == null) {
compareNull(actual);
@@ -316,25 +343,23 @@ public class DistinctValuesCollectorTest
}
}
- private void addField(Document doc, String field, String value, DocValues.Type type) {
- doc.add(new StringField(field, value, Field.Store.NO));
+ private void addField(Document doc, String field, String value, DocValuesType type) {
+ doc.add(new StringField(field, value, Field.Store.YES));
if (type == null) {
return;
}
+ String dvField = field + "_dv";
Field valuesField = null;
switch (type) {
- case VAR_INTS:
- valuesField = new PackedLongDocValuesField(field, Integer.parseInt(value));
- break;
- case FLOAT_64:
- valuesField = new DoubleDocValuesField(field, Double.parseDouble(value));
+ case NUMERIC:
+ valuesField = new NumericDocValuesField(dvField, Integer.parseInt(value));
break;
- case BYTES_VAR_STRAIGHT:
- valuesField = new StraightBytesDocValuesField(field, new BytesRef(value));
+ case BINARY:
+ valuesField = new BinaryDocValuesField(dvField, new BytesRef(value));
break;
- case BYTES_VAR_SORTED:
- valuesField = new SortedBytesDocValuesField(field, new BytesRef(value));
+ case SORTED:
+ valuesField = new SortedDocValuesField(dvField, new BytesRef(value));
break;
}
doc.add(valuesField);
@@ -344,13 +369,10 @@ public class DistinctValuesCollectorTest
private <T extends Comparable> AbstractDistinctValuesCollector<AbstractDistinctValuesCollector.GroupCount<T>> createDistinctCountCollector(AbstractFirstPassGroupingCollector<T> firstPassGroupingCollector,
String groupField,
String countField,
- DocValues.Type dvType) {
+ DocValuesType dvType) {
Random random = random();
Collection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.getTopGroups(0, false);
- if (DVFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
- boolean diskResident = random.nextBoolean();
- return DVDistinctValuesCollector.create(groupField, countField, searchGroups, diskResident, dvType);
- } else if (FunctionFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
+ if (FunctionFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
return (AbstractDistinctValuesCollector) new FunctionDistinctValuesCollector(new HashMap<Object, Object>(), new BytesRefFieldSource(groupField), new BytesRefFieldSource(countField), (Collection) searchGroups);
} else {
return (AbstractDistinctValuesCollector) new TermDistinctValuesCollector(groupField, countField, (Collection) searchGroups);
@@ -358,13 +380,10 @@ public class DistinctValuesCollectorTest
}
@SuppressWarnings({"unchecked","rawtypes"})
- private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(DocValues.Type dvType, Sort groupSort, String groupField, int topNGroups) throws IOException {
+ private <T> AbstractFirstPassGroupingCollector<T> createRandomFirstPassCollector(DocValuesType dvType, Sort groupSort, String groupField, int topNGroups) throws IOException {
Random random = random();
if (dvType != null) {
if (random.nextBoolean()) {
- boolean diskResident = random.nextBoolean();
- return DVFirstPassGroupingCollector.create(groupSort, topNGroups, groupField, dvType, diskResident);
- } else if (random.nextBoolean()) {
return (AbstractFirstPassGroupingCollector<T>) new FunctionFirstPassGroupingCollector(new BytesRefFieldSource(groupField), new HashMap<Object, Object>(), groupSort, topNGroups);
} else {
return (AbstractFirstPassGroupingCollector<T>) new TermFirstPassGroupingCollector(groupField, groupSort, topNGroups);
@@ -405,9 +424,9 @@ public class DistinctValuesCollectorTest
private IndexContext createIndexContext() throws Exception {
Random random = random();
- DocValues.Type[] dvTypes = new DocValues.Type[]{
- DocValues.Type.BYTES_VAR_STRAIGHT,
- DocValues.Type.BYTES_VAR_SORTED
+ DocValuesType[] dvTypes = new DocValuesType[]{
+ DocValuesType.BINARY,
+ DocValuesType.SORTED
};
Directory dir = newDirectory();
@@ -419,7 +438,7 @@ public class DistinctValuesCollectorTest
);
boolean canUseDV = true;
- DocValues.Type dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
+ DocValuesType dvType = canUseDV ? dvTypes[random.nextInt(dvTypes.length)] : null;
int numDocs = 86 + random.nextInt(1087) * RANDOM_MULTIPLIER;
String[] groupValues = new String[numDocs / 5];
@@ -451,18 +470,25 @@ public class DistinctValuesCollectorTest
countsVals.add(countValue);
Document doc = new Document();
- doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.NO));
+ doc.add(new StringField("id", String.format(Locale.ROOT, "%09d", i), Field.Store.YES));
if (groupValue != null) {
addField(doc, groupField, groupValue, dvType);
}
if (countValue != null) {
addField(doc, countField, countValue, dvType);
}
- doc.add(new TextField("content", content, Field.Store.NO));
+ doc.add(new TextField("content", content, Field.Store.YES));
w.addDocument(doc);
}
DirectoryReader reader = w.getReader();
+ if (VERBOSE) {
+ for(int docID=0;docID<reader.maxDoc();docID++) {
+ StoredDocument doc = reader.document(docID);
+ System.out.println("docID=" + docID + " id=" + doc.get("id") + " content=" + doc.get("content") + " author=" + doc.get("author") + " publisher=" + doc.get("publisher"));
+ }
+ }
+
w.close();
return new IndexContext(dir, reader, dvType, searchTermToGroupCounts, contentStrings.toArray(new String[contentStrings.size()]));
}
@@ -471,11 +497,11 @@ public class DistinctValuesCollectorTest
final Directory directory;
final DirectoryReader indexReader;
- final DocValues.Type dvType;
+ final DocValuesType dvType;
final Map<String, Map<String, Set<String>>> searchTermToGroupCounts;
final String[] contentStrings;
- IndexContext(Directory directory, DirectoryReader indexReader, DocValues.Type dvType,
+ IndexContext(Directory directory, DirectoryReader indexReader, DocValuesType dvType,
Map<String, Map<String, Set<String>>> searchTermToGroupCounts, String[] contentStrings) {
this.directory = directory;
this.indexReader = indexReader;
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupFacetCollectorTest.java Thu Feb 7 20:48:21 2013
@@ -20,14 +20,12 @@ package org.apache.lucene.search.groupin
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.grouping.dv.DVGroupFacetCollector;
import org.apache.lucene.search.grouping.term.TermGroupFacetCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -55,42 +53,42 @@ public class GroupFacetCollectorTest ext
// 0
Document doc = new Document();
- addField(doc, groupField, "a", canUseDV);
- addField(doc, "airport", "ams", canUseDV);
- addField(doc, "duration", "5", canUseDV);
+ addField(doc, groupField, "a", useDv);
+ addField(doc, "airport", "ams", useDv);
+ addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 1
doc = new Document();
- addField(doc, groupField, "a", canUseDV);
- addField(doc, "airport", "dus", canUseDV);
- addField(doc, "duration", "10", canUseDV);
+ addField(doc, groupField, "a", useDv);
+ addField(doc, "airport", "dus", useDv);
+ addField(doc, "duration", "10", useDv);
w.addDocument(doc);
// 2
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "airport", "ams", canUseDV);
- addField(doc, "duration", "10", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ addField(doc, "airport", "ams", useDv);
+ addField(doc, "duration", "10", useDv);
w.addDocument(doc);
w.commit(); // To ensure a second segment
// 3
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "airport", "ams", canUseDV);
- addField(doc, "duration", "5", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ addField(doc, "airport", "ams", useDv);
+ addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 4
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "airport", "ams", canUseDV);
- addField(doc, "duration", "5", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ addField(doc, "airport", "ams", useDv);
+ addField(doc, "duration", "5", useDv);
w.addDocument(doc);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
- AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
+ AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@@ -104,7 +102,7 @@ public class GroupFacetCollectorTest ext
assertEquals(1, entries.get(1).getCount());
- AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
+ AbstractGroupFacetCollector groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
TermGroupFacetCollector.GroupedFacetResult durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(4, durationResult.getTotalCount());
@@ -119,47 +117,59 @@ public class GroupFacetCollectorTest ext
// 5
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "duration", "5", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ // missing airport
+ if (useDv) {
+ addField(doc, "airport", "", useDv);
+ }
+ addField(doc, "duration", "5", useDv);
w.addDocument(doc);
// 6
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "airport", "bru", canUseDV);
- addField(doc, "duration", "10", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ addField(doc, "airport", "bru", useDv);
+ addField(doc, "duration", "10", useDv);
w.addDocument(doc);
// 7
doc = new Document();
- addField(doc, groupField, "b", canUseDV);
- addField(doc, "airport", "bru", canUseDV);
- addField(doc, "duration", "15", canUseDV);
+ addField(doc, groupField, "b", useDv);
+ addField(doc, "airport", "bru", useDv);
+ addField(doc, "duration", "15", useDv);
w.addDocument(doc);
// 8
doc = new Document();
- addField(doc, groupField, "a", canUseDV);
- addField(doc, "airport", "bru", canUseDV);
- addField(doc, "duration", "10", canUseDV);
+ addField(doc, groupField, "a", useDv);
+ addField(doc, "airport", "bru", useDv);
+ addField(doc, "duration", "10", useDv);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
- groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
+ groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, !useDv);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(3, 0, true);
- assertEquals(5, airportResult.getTotalCount());
- assertEquals(1, airportResult.getTotalMissingCount());
-
entries = airportResult.getFacetEntries(1, 2);
assertEquals(2, entries.size());
- assertEquals("bru", entries.get(0).getValue().utf8ToString());
- assertEquals(2, entries.get(0).getCount());
- assertEquals("dus", entries.get(1).getValue().utf8ToString());
- assertEquals(1, entries.get(1).getCount());
+ if (useDv) {
+ assertEquals(6, airportResult.getTotalCount());
+ assertEquals(0, airportResult.getTotalMissingCount());
+ assertEquals("bru", entries.get(0).getValue().utf8ToString());
+ assertEquals(2, entries.get(0).getCount());
+ assertEquals("", entries.get(1).getValue().utf8ToString());
+ assertEquals(1, entries.get(1).getCount());
+ } else {
+ assertEquals(5, airportResult.getTotalCount());
+ assertEquals(1, airportResult.getTotalMissingCount());
+ assertEquals("bru", entries.get(0).getValue().utf8ToString());
+ assertEquals(2, entries.get(0).getCount());
+ assertEquals("dus", entries.get(1).getValue().utf8ToString());
+ assertEquals(1, entries.get(1).getCount());
+ }
- groupedDurationFacetCollector = createRandomCollector(groupField, "duration", null, false, useDv);
+ groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 2, true);
assertEquals(5, durationResult.getTotalCount());
@@ -172,36 +182,49 @@ public class GroupFacetCollectorTest ext
// 9
doc = new Document();
- addField(doc, groupField, "c", canUseDV);
- addField(doc, "airport", "bru", canUseDV);
- addField(doc, "duration", "15", canUseDV);
+ addField(doc, groupField, "c", useDv);
+ addField(doc, "airport", "bru", useDv);
+ addField(doc, "duration", "15", useDv);
w.addDocument(doc);
// 10
doc = new Document();
- addField(doc, groupField, "c", canUseDV);
- addField(doc, "airport", "dus", canUseDV);
- addField(doc, "duration", "10", canUseDV);
+ addField(doc, groupField, "c", useDv);
+ addField(doc, "airport", "dus", useDv);
+ addField(doc, "duration", "10", useDv);
w.addDocument(doc);
indexSearcher.getIndexReader().close();
indexSearcher = new IndexSearcher(w.getReader());
- groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, false, useDv);
+ groupedAirportFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "airport_dv" : "airport", null, false);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
- assertEquals(7, airportResult.getTotalCount());
- assertEquals(1, airportResult.getTotalMissingCount());
-
entries = airportResult.getFacetEntries(0, 10);
- assertEquals(3, entries.size());
- assertEquals("ams", entries.get(0).getValue().utf8ToString());
- assertEquals(2, entries.get(0).getCount());
- assertEquals("bru", entries.get(1).getValue().utf8ToString());
- assertEquals(3, entries.get(1).getCount());
- assertEquals("dus", entries.get(2).getValue().utf8ToString());
- assertEquals(2, entries.get(2).getCount());
+ if (useDv) {
+ assertEquals(8, airportResult.getTotalCount());
+ assertEquals(0, airportResult.getTotalMissingCount());
+ assertEquals(4, entries.size());
+ assertEquals("", entries.get(0).getValue().utf8ToString());
+ assertEquals(1, entries.get(0).getCount());
+ assertEquals("ams", entries.get(1).getValue().utf8ToString());
+ assertEquals(2, entries.get(1).getCount());
+ assertEquals("bru", entries.get(2).getValue().utf8ToString());
+ assertEquals(3, entries.get(2).getCount());
+ assertEquals("dus", entries.get(3).getValue().utf8ToString());
+ assertEquals(2, entries.get(3).getCount());
+ } else {
+ assertEquals(7, airportResult.getTotalCount());
+ assertEquals(1, airportResult.getTotalMissingCount());
+ assertEquals(3, entries.size());
+ assertEquals("ams", entries.get(0).getValue().utf8ToString());
+ assertEquals(2, entries.get(0).getCount());
+ assertEquals("bru", entries.get(1).getValue().utf8ToString());
+ assertEquals(3, entries.get(1).getCount());
+ assertEquals("dus", entries.get(2).getValue().utf8ToString());
+ assertEquals(2, entries.get(2).getCount());
+ }
- groupedDurationFacetCollector = createRandomCollector(groupField, "duration", "1", false, useDv);
+ groupedDurationFacetCollector = createRandomCollector(useDv ? "hotel_dv" : "hotel", useDv ? "duration_dv" : "duration", "1", false);
indexSearcher.search(new MatchAllDocsQuery(), groupedDurationFacetCollector);
durationResult = groupedDurationFacetCollector.mergeSegmentResults(10, 0, true);
assertEquals(5, durationResult.getTotalCount());
@@ -237,13 +260,13 @@ public class GroupFacetCollectorTest ext
// 0
Document doc = new Document();
- addField(doc, "x", "x", useDv);
+ doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
addField(doc, groupField, "a", useDv);
- addField(doc, "airport", "ams", useDv);
+ doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
w.commit();
@@ -252,43 +275,44 @@ public class GroupFacetCollectorTest ext
// 2
doc = new Document();
addField(doc, groupField, "a", useDv);
- addField(doc, "airport", "ams", useDv);
+ doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
addField(doc, groupField, "a", useDv);
- addField(doc, "airport", "dus", useDv);
+ doc.add(new StringField("airport", "dus", Field.Store.NO));
+
w.addDocument(doc);
// 4
doc = new Document();
addField(doc, groupField, "b", useDv);
- addField(doc, "airport", "ams", useDv);
+ doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
addField(doc, groupField, "b", useDv);
- addField(doc, "airport", "ams", useDv);
+ doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
// 6
doc = new Document();
addField(doc, groupField, "b", useDv);
- addField(doc, "airport", "ams", useDv);
+ doc.add(new StringField("airport", "ams", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 7
doc = new Document();
- addField(doc, "x", "x", useDv);
+ doc.add(new StringField("x", "x", Field.Store.NO));
w.addDocument(doc);
w.commit();
w.close();
IndexSearcher indexSearcher = new IndexSearcher(DirectoryReader.open(dir));
- AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true, useDv);
+ AbstractGroupFacetCollector groupedAirportFacetCollector = createRandomCollector(groupField, "airport", null, true);
indexSearcher.search(new MatchAllDocsQuery(), groupedAirportFacetCollector);
TermGroupFacetCollector.GroupedFacetResult airportResult = groupedAirportFacetCollector.mergeSegmentResults(10, 0, false);
assertEquals(3, airportResult.getTotalCount());
@@ -308,7 +332,7 @@ public class GroupFacetCollectorTest ext
private void addField(Document doc, String field, String value, boolean canUseIDV) {
doc.add(new StringField(field, value, Field.Store.NO));
if (canUseIDV) {
- doc.add(new SortedBytesDocValuesField(field, new BytesRef(value)));
+ doc.add(new SortedDocValuesField(field + "_dv", new BytesRef(value)));
}
}
@@ -320,8 +344,15 @@ public class GroupFacetCollectorTest ext
IndexContext context = createIndexContext(multipleFacetsPerDocument);
final IndexSearcher searcher = newSearcher(context.indexReader);
+ if (VERBOSE) {
+ System.out.println("TEST: searcher=" + searcher);
+ }
+
for (int searchIter = 0; searchIter < 100; searchIter++) {
- boolean useDv = context.useDV && random.nextBoolean();
+ if (VERBOSE) {
+ System.out.println("TEST: searchIter=" + searchIter);
+ }
+ boolean useDv = !multipleFacetsPerDocument && context.useDV && random.nextBoolean();
String searchTerm = context.contentStrings[random.nextInt(context.contentStrings.length)];
int limit = random.nextInt(context.facetValues.size());
int offset = random.nextInt(context.facetValues.size() - limit);
@@ -344,7 +375,7 @@ public class GroupFacetCollectorTest ext
}
GroupedFacetResult expectedFacetResult = createExpectedFacetResult(searchTerm, context, offset, limit, minCount, orderByCount, facetPrefix);
- AbstractGroupFacetCollector groupFacetCollector = createRandomCollector("group", "facet", facetPrefix, multipleFacetsPerDocument, useDv);
+ AbstractGroupFacetCollector groupFacetCollector = createRandomCollector(useDv ? "group_dv" : "group", useDv ? "facet_dv" : "facet", facetPrefix, multipleFacetsPerDocument);
searcher.search(new TermQuery(new Term("content", searchTerm)), groupFacetCollector);
TermGroupFacetCollector.GroupedFacetResult actualFacetResult = groupFacetCollector.mergeSegmentResults(size, minCount, orderByCount);
@@ -352,6 +383,7 @@ public class GroupFacetCollectorTest ext
List<TermGroupFacetCollector.FacetEntry> actualFacetEntries = actualFacetResult.getFacetEntries(offset, limit);
if (VERBOSE) {
+ System.out.println("Use DV: " + useDv);
System.out.println("Collector: " + groupFacetCollector.getClass().getSimpleName());
System.out.println("Num group: " + context.numGroups);
System.out.println("Num doc: " + context.numDocs);
@@ -369,7 +401,7 @@ public class GroupFacetCollectorTest ext
System.out.println("\n=== Expected: \n");
System.out.println("Total count " + expectedFacetResult.getTotalCount());
System.out.println("Total missing count " + expectedFacetResult.getTotalMissingCount());
- int counter = 1;
+ int counter = 0;
for (TermGroupFacetCollector.FacetEntry expectedFacetEntry : expectedFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
@@ -382,7 +414,7 @@ public class GroupFacetCollectorTest ext
System.out.println("\n=== Actual: \n");
System.out.println("Total count " + actualFacetResult.getTotalCount());
System.out.println("Total missing count " + actualFacetResult.getTotalMissingCount());
- counter = 1;
+ counter = 0;
for (TermGroupFacetCollector.FacetEntry actualFacetEntry : actualFacetEntries) {
System.out.println(
String.format(Locale.ROOT,
@@ -393,15 +425,15 @@ public class GroupFacetCollectorTest ext
}
System.out.println("\n===================================================================================");
}
-
+
assertEquals(expectedFacetResult.getTotalCount(), actualFacetResult.getTotalCount());
assertEquals(expectedFacetResult.getTotalMissingCount(), actualFacetResult.getTotalMissingCount());
assertEquals(expectedFacetEntries.size(), actualFacetEntries.size());
for (int i = 0; i < expectedFacetEntries.size(); i++) {
TermGroupFacetCollector.FacetEntry expectedFacetEntry = expectedFacetEntries.get(i);
TermGroupFacetCollector.FacetEntry actualFacetEntry = actualFacetEntries.get(i);
- assertEquals(expectedFacetEntry.getValue().utf8ToString() + " != " + actualFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getValue(), actualFacetEntry.getValue());
- assertEquals(expectedFacetEntry.getCount() + " != " + actualFacetEntry.getCount(), expectedFacetEntry.getCount(), actualFacetEntry.getCount());
+ assertEquals("i=" + i + ": " + expectedFacetEntry.getValue().utf8ToString() + " != " + actualFacetEntry.getValue().utf8ToString(), expectedFacetEntry.getValue(), actualFacetEntry.getValue());
+ assertEquals("i=" + i + ": " + expectedFacetEntry.getCount() + " != " + actualFacetEntry.getCount(), expectedFacetEntry.getCount(), actualFacetEntry.getCount());
}
}
@@ -449,14 +481,14 @@ public class GroupFacetCollectorTest ext
)
);
boolean canUseDV = true;
- boolean useDv = canUseDV && random.nextBoolean();
+ boolean useDv = canUseDV && !multipleFacetValuesPerDocument && random.nextBoolean();
Document doc = new Document();
Document docNoGroup = new Document();
Document docNoFacet = new Document();
Document docNoGroupNoFacet = new Document();
Field group = newStringField("group", "", Field.Store.NO);
- Field groupDc = new SortedBytesDocValuesField("group", new BytesRef());
+ Field groupDc = new SortedDocValuesField("group_dv", new BytesRef());
if (useDv) {
doc.add(groupDc);
docNoFacet.add(groupDc);
@@ -465,11 +497,12 @@ public class GroupFacetCollectorTest ext
docNoFacet.add(group);
Field[] facetFields;
if (useDv) {
+ assert !multipleFacetValuesPerDocument;
facetFields = new Field[2];
facetFields[0] = newStringField("facet", "", Field.Store.NO);
doc.add(facetFields[0]);
docNoGroup.add(facetFields[0]);
- facetFields[1] = new SortedBytesDocValuesField("facet", new BytesRef());
+ facetFields[1] = new SortedDocValuesField("facet_dv", new BytesRef());
doc.add(facetFields[1]);
docNoGroup.add(facetFields[1]);
} else {
@@ -509,7 +542,11 @@ public class GroupFacetCollectorTest ext
if (random.nextInt(24) == 17) {
// So we test the "doc doesn't have the group'd
// field" case:
- groupValue = null;
+ if (useDv) {
+ groupValue = "";
+ } else {
+ groupValue = null;
+ }
} else {
groupValue = groups.get(random.nextInt(groups.size()));
}
@@ -521,7 +558,7 @@ public class GroupFacetCollectorTest ext
Map<String, Set<String>> facetToGroups = searchTermToFacetToGroups.get(contentStr);
List<String> facetVals = new ArrayList<String>();
- if (random.nextInt(24) != 18) {
+ if (useDv || random.nextInt(24) != 18) {
if (useDv) {
String facetValue = facetValues.get(random.nextInt(facetValues.size()));
uniqueFacetValues.add(facetValue);
@@ -573,6 +610,9 @@ public class GroupFacetCollectorTest ext
groupDc.setBytesValue(new BytesRef(groupValue));
}
group.setStringValue(groupValue);
+ } else if (useDv) {
+ // DV cannot have missing values:
+ groupDc.setBytesValue(new BytesRef());
}
content.setStringValue(contentStr);
if (groupValue == null && facetVals.isEmpty()) {
@@ -662,14 +702,11 @@ public class GroupFacetCollectorTest ext
return new GroupedFacetResult(totalCount, totalMissCount, entriesResult);
}
- private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument, boolean useDv) {
+ private AbstractGroupFacetCollector createRandomCollector(String groupField, String facetField, String facetPrefix, boolean multipleFacetsPerDocument) {
BytesRef facetPrefixBR = facetPrefix == null ? null : new BytesRef(facetPrefix);
- if (useDv) {
- return DVGroupFacetCollector.createDvGroupFacetCollector(groupField, DocValues.Type.BYTES_VAR_SORTED,
- random().nextBoolean(), facetField, DocValues.Type.BYTES_VAR_SORTED, random().nextBoolean(), facetPrefixBR, random().nextInt(1024));
- } else {
- return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
- }
+ // DocValues cannot be multi-valued:
+ assert !multipleFacetsPerDocument || !groupField.endsWith("_dv");
+ return TermGroupFacetCollector.createTermGroupFacetCollector(groupField, facetField, multipleFacetsPerDocument, facetPrefixBR, random().nextInt(1024));
}
private String getFromSet(Set<String> set, int index) {
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/GroupingSearchTest.java Thu Feb 7 20:48:21 2013
@@ -21,10 +21,9 @@ import org.apache.lucene.analysis.MockAn
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.SortedBytesDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
-import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.ValueSource;
@@ -177,7 +176,7 @@ public class GroupingSearchTest extends
private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
doc.add(new TextField(groupField, value, Field.Store.YES));
if (canUseIDV) {
- doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value)));
+ doc.add(new SortedDocValuesField(groupField, new BytesRef(value)));
}
}
@@ -210,12 +209,7 @@ public class GroupingSearchTest extends
ValueSource vs = new BytesRefFieldSource(groupField);
groupingSearch = new GroupingSearch(vs, new HashMap<Object, Object>());
} else {
- if (canUseIDV && random().nextBoolean()) {
- boolean diskResident = random().nextBoolean();
- groupingSearch = new GroupingSearch(groupField, DocValues.Type.BYTES_VAR_SORTED, diskResident);
- } else {
- groupingSearch = new GroupingSearch(groupField);
- }
+ groupingSearch = new GroupingSearch(groupField);
}
groupingSearch.setGroupSort(groupSort);
Modified: lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java (original)
+++ lucene/dev/trunk/lucene/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java Thu Feb 7 20:48:21 2013
@@ -19,9 +19,7 @@ package org.apache.lucene.search.groupin
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
-import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.CompositeReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.FieldInfo.IndexOptions;
@@ -29,13 +27,9 @@ import org.apache.lucene.index.RandomInd
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.Term;
-import org.apache.lucene.index.DocValues.Type;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.BytesRefFieldSource;
import org.apache.lucene.search.*;
-import org.apache.lucene.search.grouping.dv.DVAllGroupsCollector;
-import org.apache.lucene.search.grouping.dv.DVFirstPassGroupingCollector;
-import org.apache.lucene.search.grouping.dv.DVSecondPassGroupingCollector;
import org.apache.lucene.search.grouping.function.FunctionAllGroupsCollector;
import org.apache.lucene.search.grouping.function.FunctionFirstPassGroupingCollector;
import org.apache.lucene.search.grouping.function.FunctionSecondPassGroupingCollector;
@@ -62,7 +56,7 @@ public class TestGrouping extends Lucene
public void testBasic() throws Exception {
- final String groupField = "author";
+ String groupField = "author";
FieldType customType = new FieldType();
customType.setStored(true);
@@ -126,7 +120,12 @@ public class TestGrouping extends Lucene
w.close();
final Sort groupSort = Sort.RELEVANCE;
- final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10, canUseIDV);
+
+ if (canUseIDV && random().nextBoolean()) {
+ groupField += "_dv";
+ }
+
+ final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, 10);
indexSearcher.search(new TermQuery(new Term("content", "random")), c1);
final AbstractSecondPassGroupingCollector<?> c2 = createSecondPassCollector(c1, groupField, groupSort, null, 0, 5, true, true, true);
@@ -176,16 +175,13 @@ public class TestGrouping extends Lucene
private void addGroupField(Document doc, String groupField, String value, boolean canUseIDV) {
doc.add(new TextField(groupField, value, Field.Store.YES));
if (canUseIDV) {
- doc.add(new SortedBytesDocValuesField(groupField, new BytesRef(value)));
+ doc.add(new SortedDocValuesField(groupField + "_dv", new BytesRef(value)));
}
}
- private AbstractFirstPassGroupingCollector<?> createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs, boolean canUseIDV) throws IOException {
+ private AbstractFirstPassGroupingCollector<?> createRandomFirstPassCollector(String groupField, Sort groupSort, int topDocs) throws IOException {
AbstractFirstPassGroupingCollector<?> selected;
- if (canUseIDV && random().nextBoolean()) {
- boolean diskResident = random().nextBoolean();
- selected = DVFirstPassGroupingCollector.create(groupSort, topDocs, groupField, Type.BYTES_VAR_SORTED, diskResident);
- } else if (random().nextBoolean()) {
+ if (random().nextBoolean()) {
ValueSource vs = new BytesRefFieldSource(groupField);
selected = new FunctionFirstPassGroupingCollector(vs, new HashMap<Object, Object>(), groupSort, topDocs);
} else {
@@ -198,10 +194,7 @@ public class TestGrouping extends Lucene
}
private AbstractFirstPassGroupingCollector<?> createFirstPassCollector(String groupField, Sort groupSort, int topDocs, AbstractFirstPassGroupingCollector<?> firstPassGroupingCollector) throws IOException {
- if (DVFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
- boolean diskResident = random().nextBoolean();
- return DVFirstPassGroupingCollector.create(groupSort, topDocs, groupField, Type.BYTES_VAR_SORTED, diskResident);
- } else if (TermFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
+ if (TermFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
ValueSource vs = new BytesRefFieldSource(groupField);
return new FunctionFirstPassGroupingCollector(vs, new HashMap<Object, Object>(), groupSort, topDocs);
} else {
@@ -220,11 +213,7 @@ public class TestGrouping extends Lucene
boolean getMaxScores,
boolean fillSortFields) throws IOException {
- if (DVFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
- boolean diskResident = random().nextBoolean();
- Collection<SearchGroup<T>> searchGroups = firstPassGroupingCollector.getTopGroups(groupOffset, fillSortFields);
- return DVSecondPassGroupingCollector.create(groupField, diskResident, Type.BYTES_VAR_SORTED, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
- } else if (TermFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
+ if (TermFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
Collection<SearchGroup<BytesRef>> searchGroups = firstPassGroupingCollector.getTopGroups(groupOffset, fillSortFields);
return (AbstractSecondPassGroupingCollector) new TermSecondPassGroupingCollector(groupField, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup , getScores, getMaxScores, fillSortFields);
} else {
@@ -245,10 +234,7 @@ public class TestGrouping extends Lucene
boolean getScores,
boolean getMaxScores,
boolean fillSortFields) throws IOException {
- if (DVFirstPassGroupingCollector.class.isAssignableFrom(firstPassGroupingCollector.getClass())) {
- boolean diskResident = random().nextBoolean();
- return DVSecondPassGroupingCollector.create(groupField, diskResident, Type.BYTES_VAR_SORTED, (Collection) searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup, getScores, getMaxScores, fillSortFields);
- } else if (firstPassGroupingCollector.getClass().isAssignableFrom(TermFirstPassGroupingCollector.class)) {
+ if (firstPassGroupingCollector.getClass().isAssignableFrom(TermFirstPassGroupingCollector.class)) {
return new TermSecondPassGroupingCollector(groupField, searchGroups, groupSort, sortWithinGroup, maxDocsPerGroup , getScores, getMaxScores, fillSortFields);
} else {
ValueSource vs = new BytesRefFieldSource(groupField);
@@ -275,9 +261,6 @@ public class TestGrouping extends Lucene
String groupField) {
if (firstPassGroupingCollector.getClass().isAssignableFrom(TermFirstPassGroupingCollector.class)) {
return new TermAllGroupsCollector(groupField);
- } else if (firstPassGroupingCollector.getClass().isAssignableFrom(DVFirstPassGroupingCollector.class)) {
- boolean diskResident = random().nextBoolean();
- return DVAllGroupsCollector.create(groupField, Type.BYTES_VAR_SORTED, diskResident);
} else {
ValueSource vs = new BytesRefFieldSource(groupField);
return new FunctionAllGroupsCollector(vs, new HashMap<Object, Object>());
@@ -324,10 +307,6 @@ public class TestGrouping extends Lucene
groups.add(sg);
}
return groups;
- } else if (DVFirstPassGroupingCollector.class.isAssignableFrom(c.getClass())) {
- @SuppressWarnings("unchecked")
- Collection<SearchGroup<BytesRef>> topGroups = ((DVFirstPassGroupingCollector<BytesRef>) c).getTopGroups(groupOffset, fillFields);
- return topGroups;
}
fail();
return null;
@@ -345,8 +324,6 @@ public class TestGrouping extends Lucene
groups.add(new GroupDocs<BytesRef>(Float.NaN, mvalGd.maxScore, mvalGd.totalHits, mvalGd.scoreDocs, groupValue, mvalGd.groupSortValues));
}
return new TopGroups<BytesRef>(mvalTopGroups.groupSort, mvalTopGroups.withinGroupSort, mvalTopGroups.totalHitCount, mvalTopGroups.totalGroupedHitCount, groups.toArray(new GroupDocs[groups.size()]), Float.NaN);
- } else if (DVSecondPassGroupingCollector.class.isAssignableFrom(c.getClass())) {
- return ((DVSecondPassGroupingCollector<BytesRef>) c).getTopGroups(withinGroupOffset);
}
fail();
return null;
@@ -665,8 +642,10 @@ public class TestGrouping extends Lucene
String randomValue;
do {
// B/c of DV based impl we can't see the difference between an empty string and a null value.
- // For that reason we don't generate empty string groups.
+ // For that reason we don't generate empty string
+ // groups.
randomValue = _TestUtil.randomRealisticUnicodeString(random());
+ //randomValue = _TestUtil.randomSimpleString(random());
} while ("".equals(randomValue));
groups.add(new BytesRef(randomValue));
@@ -698,9 +677,10 @@ public class TestGrouping extends Lucene
Document doc = new Document();
Document docNoGroup = new Document();
- Field idvGroupField = new SortedBytesDocValuesField("group", new BytesRef());
+ Field idvGroupField = new SortedDocValuesField("group_dv", new BytesRef());
if (canUseIDV) {
doc.add(idvGroupField);
+ docNoGroup.add(idvGroupField);
}
Field group = newStringField("group", "", Field.Store.NO);
@@ -742,6 +722,11 @@ public class TestGrouping extends Lucene
if (canUseIDV) {
idvGroupField.setBytesValue(BytesRef.deepCopyOf(groupDoc.group));
}
+ } else if (canUseIDV) {
+ // Must explicitly set empty string, else eg if
+ // the segment has all docs missing the field then
+ // we get null back instead of empty BytesRef:
+ idvGroupField.setBytesValue(new BytesRef());
}
sort1.setStringValue(groupDoc.sort1.utf8ToString());
sort2.setStringValue(groupDoc.sort2.utf8ToString());
@@ -761,12 +746,16 @@ public class TestGrouping extends Lucene
w.close();
// NOTE: intentional but temporary field cache insanity!
- final int[] docIDToID = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
+ final FieldCache.Ints docIDToID = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(r), "id", false);
DirectoryReader rBlocks = null;
Directory dirBlocks = null;
try {
final IndexSearcher s = newSearcher(r);
+ if (VERBOSE) {
+ System.out.println("\nTEST: searcher=" + s);
+ }
+
if (SlowCompositeReaderWrapper.class.isAssignableFrom(s.getIndexReader().getClass())) {
canUseIDV = false;
} else {
@@ -777,11 +766,10 @@ public class TestGrouping extends Lucene
for(int contentID=0;contentID<3;contentID++) {
final ScoreDoc[] hits = s.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
- final GroupDoc gd = groupDocs[docIDToID[hit.doc]];
+ final GroupDoc gd = groupDocs[docIDToID.get(hit.doc)];
assertTrue(gd.score == 0.0);
gd.score = hit.score;
- assertEquals(gd.id, docIDToID[hit.doc]);
- //System.out.println(" score=" + hit.score + " id=" + docIDToID[hit.doc]);
+ assertEquals(gd.id, docIDToID.get(hit.doc));
}
}
@@ -794,7 +782,7 @@ public class TestGrouping extends Lucene
dirBlocks = newDirectory();
rBlocks = getDocBlockReader(dirBlocks, groupDocs);
final Filter lastDocInBlock = new CachingWrapperFilter(new QueryWrapperFilter(new TermQuery(new Term("groupend", "x"))));
- final int[] docIDToIDBlocks = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(rBlocks), "id", false);
+ final FieldCache.Ints docIDToIDBlocks = FieldCache.DEFAULT.getInts(new SlowCompositeReaderWrapper(rBlocks), "id", false);
final IndexSearcher sBlocks = newSearcher(rBlocks);
final ShardState shardsBlocks = new ShardState(sBlocks);
@@ -815,11 +803,11 @@ public class TestGrouping extends Lucene
//" dfnew=" + sBlocks.docFreq(new Term("content", "real"+contentID)));
final ScoreDoc[] hits = sBlocks.search(new TermQuery(new Term("content", "real"+contentID)), numDocs).scoreDocs;
for(ScoreDoc hit : hits) {
- final GroupDoc gd = groupDocsByID[docIDToIDBlocks[hit.doc]];
+ final GroupDoc gd = groupDocsByID[docIDToIDBlocks.get(hit.doc)];
assertTrue(gd.score2 == 0.0);
gd.score2 = hit.score;
- assertEquals(gd.id, docIDToIDBlocks[hit.doc]);
- //System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks[hit.doc]);
+ assertEquals(gd.id, docIDToIDBlocks.get(hit.doc));
+ //System.out.println(" score=" + gd.score + " score2=" + hit.score + " id=" + docIDToIDBlocks.get(hit.doc));
termScoreMap.put(gd.score, gd.score2);
}
}
@@ -867,13 +855,20 @@ public class TestGrouping extends Lucene
System.out.println("TEST: groupSort=" + groupSort + " docSort=" + docSort + " searchTerm=" + searchTerm + " dF=" + r.docFreq(new Term("content", searchTerm)) +" dFBlock=" + rBlocks.docFreq(new Term("content", searchTerm)) + " topNGroups=" + topNGroups + " groupOffset=" + groupOffset + " docOffset=" + docOffset + " doCache=" + doCache + " docsPerGroup=" + docsPerGroup + " doAllGroups=" + doAllGroups + " getScores=" + getScores + " getMaxScores=" + getMaxScores);
}
- final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector("group", groupSort, groupOffset+topNGroups, canUseIDV);
+ String groupField = "group";
+ if (canUseIDV && random().nextBoolean()) {
+ groupField += "_dv";
+ }
+ if (VERBOSE) {
+ System.out.println(" groupField=" + groupField);
+ }
+ final AbstractFirstPassGroupingCollector<?> c1 = createRandomFirstPassCollector(groupField, groupSort, groupOffset+topNGroups);
final CachingCollector cCache;
final Collector c;
final AbstractAllGroupsCollector<?> allGroupsCollector;
if (doAllGroups) {
- allGroupsCollector = createAllGroupsCollector(c1, "group");
+ allGroupsCollector = createAllGroupsCollector(c1, groupField);
} else {
allGroupsCollector = null;
}
@@ -908,6 +903,7 @@ public class TestGrouping extends Lucene
// Search top reader:
final Query query = new TermQuery(new Term("content", searchTerm));
+
s.search(query, c);
if (doCache && !useWrappingCollector) {
@@ -956,7 +952,7 @@ public class TestGrouping extends Lucene
}
}
- c2 = createSecondPassCollector(c1, "group", groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
+ c2 = createSecondPassCollector(c1, groupField, groupSort, docSort, groupOffset, docOffset + docsPerGroup, getScores, getMaxScores, fillFields);
if (doCache) {
if (cCache.isCached()) {
if (VERBOSE) {
@@ -995,7 +991,7 @@ public class TestGrouping extends Lucene
} else {
System.out.println("TEST: expected groups totalGroupedHitCount=" + expectedGroups.totalGroupedHitCount);
for(GroupDocs<BytesRef> gd : expectedGroups.groups) {
- System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
+ System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits + " scoreDocs.len=" + gd.scoreDocs.length);
for(ScoreDoc sd : gd.scoreDocs) {
System.out.println(" id=" + sd.doc + " score=" + sd.score);
}
@@ -1009,13 +1005,13 @@ public class TestGrouping extends Lucene
for(GroupDocs<BytesRef> gd : groupsResult.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
- System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);
+ System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
if (searchIter == 14) {
for(int docIDX=0;docIDX<s.getIndexReader().maxDoc();docIDX++) {
- System.out.println("ID=" + docIDToID[docIDX] + " explain=" + s.explain(query, docIDX));
+ System.out.println("ID=" + docIDToID.get(docIDX) + " explain=" + s.explain(query, docIDX));
}
}
}
@@ -1027,14 +1023,13 @@ public class TestGrouping extends Lucene
for(GroupDocs<BytesRef> gd : topGroupsShards.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
- System.out.println(" id=" + docIDToID[sd.doc] + " score=" + sd.score);
+ System.out.println(" id=" + docIDToID.get(sd.doc) + " score=" + sd.score);
}
}
}
}
- boolean idvBasedImplsUsed = DVFirstPassGroupingCollector.class.isAssignableFrom(c1.getClass());
- assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, idvBasedImplsUsed);
+ assertEquals(docIDToID, expectedGroups, groupsResult, true, true, true, getScores, groupField.endsWith("_dv"));
// Confirm merged shards match:
assertEquals(docIDToID, expectedGroups, topGroupsShards, true, false, fillFields, getScores, idvBasedImplsUsedSharded.value);
@@ -1047,6 +1042,9 @@ public class TestGrouping extends Lucene
final TermAllGroupsCollector allGroupsCollector2;
final Collector c4;
if (doAllGroups) {
+ // NOTE: must be "group" and not "group_dv"
+ // (groupField) because we didn't index doc
+ // values in the block index:
allGroupsCollector2 = new TermAllGroupsCollector("group");
c4 = MultiCollector.wrap(c3, allGroupsCollector2);
} else {
@@ -1074,7 +1072,7 @@ public class TestGrouping extends Lucene
for(GroupDocs<BytesRef> gd : groupsResultBlocks.groups) {
System.out.println(" group=" + (gd.groupValue == null ? "null" : gd.groupValue.utf8ToString()) + " totalHits=" + gd.totalHits);
for(ScoreDoc sd : gd.scoreDocs) {
- System.out.println(" id=" + docIDToIDBlocks[sd.doc] + " score=" + sd.score);
+ System.out.println(" id=" + docIDToIDBlocks.get(sd.doc) + " score=" + sd.score);
if (first) {
System.out.println("explain: " + sBlocks.explain(query, sd.doc));
first = false;
@@ -1085,8 +1083,10 @@ public class TestGrouping extends Lucene
}
// Get shard'd block grouping result:
+ // Block index does not index DocValues so we pass
+ // false for canUseIDV:
final TopGroups<BytesRef> topGroupsBlockShards = searchShards(sBlocks, shardsBlocks.subSearchers, query,
- groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, true, new ValueHolder<Boolean>(false));
+ groupSort, docSort, groupOffset, topNGroups, docOffset, docsPerGroup, getScores, getMaxScores, false, false, new ValueHolder<Boolean>(false));
if (expectedGroups != null) {
// Fixup scores for reader2
@@ -1164,34 +1164,49 @@ public class TestGrouping extends Lucene
// TODO: swap in caching, all groups collector hereassertEquals(expected.totalHitCount, actual.totalHitCount);
// too...
if (VERBOSE) {
- System.out.println("TEST: " + subSearchers.length + " shards: " + Arrays.toString(subSearchers));
+ System.out.println("TEST: " + subSearchers.length + " shards: " + Arrays.toString(subSearchers) + " canUseIDV=" + canUseIDV);
}
// Run 1st pass collector to get top groups per shard
final Weight w = topSearcher.createNormalizedWeight(query);
final List<Collection<SearchGroup<BytesRef>>> shardGroups = new ArrayList<Collection<SearchGroup<BytesRef>>>();
List<AbstractFirstPassGroupingCollector<?>> firstPassGroupingCollectors = new ArrayList<AbstractFirstPassGroupingCollector<?>>();
AbstractFirstPassGroupingCollector<?> firstPassCollector = null;
- for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
- if (SlowCompositeReaderWrapper.class.isAssignableFrom(subSearchers[shardIDX].getIndexReader().getClass())) {
- canUseIDV = false;
+ boolean shardsCanUseIDV;
+ if (canUseIDV) {
+ if (SlowCompositeReaderWrapper.class.isAssignableFrom(subSearchers[0].getIndexReader().getClass())) {
+ shardsCanUseIDV = false;
} else {
- canUseIDV = !preFlex;
+ shardsCanUseIDV = !preFlex;
}
+ } else {
+ shardsCanUseIDV = false;
+ }
+
+ String groupField = "group";
+ if (shardsCanUseIDV && random().nextBoolean()) {
+ groupField += "_dv";
+ usedIdvBasedImpl.value = true;
+ }
+
+ for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
+ // First shard determines whether we use IDV or not;
+ // all other shards match that:
if (firstPassCollector == null) {
- firstPassCollector = createRandomFirstPassCollector("group", groupSort, groupOffset + topNGroups, canUseIDV);
- if (DVFirstPassGroupingCollector.class.isAssignableFrom(firstPassCollector.getClass())) {
- usedIdvBasedImpl.value = true;
- }
+ firstPassCollector = createRandomFirstPassCollector(groupField, groupSort, groupOffset + topNGroups);
} else {
- firstPassCollector = createFirstPassCollector("group", groupSort, groupOffset + topNGroups, firstPassCollector);
+ firstPassCollector = createFirstPassCollector(groupField, groupSort, groupOffset + topNGroups, firstPassCollector);
+ }
+ if (VERBOSE) {
+ System.out.println(" shard=" + shardIDX + " groupField=" + groupField);
+ System.out.println(" 1st pass collector=" + firstPassCollector);
}
firstPassGroupingCollectors.add(firstPassCollector);
subSearchers[shardIDX].search(w, firstPassCollector);
final Collection<SearchGroup<BytesRef>> topGroups = getSearchGroups(firstPassCollector, 0, true);
if (topGroups != null) {
if (VERBOSE) {
- System.out.println(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " " + topGroups.size() + " groups:");
+ System.out.println(" shard " + shardIDX + " s=" + subSearchers[shardIDX] + " totalGroupedHitCount=?" + " " + topGroups.size() + " groups:");
for(SearchGroup<BytesRef> group : topGroups) {
System.out.println(" " + groupToString(group.groupValue) + " groupSort=" + Arrays.toString(group.sortValues));
}
@@ -1219,7 +1234,7 @@ public class TestGrouping extends Lucene
final TopGroups<BytesRef>[] shardTopGroups = new TopGroups[subSearchers.length];
for(int shardIDX=0;shardIDX<subSearchers.length;shardIDX++) {
final AbstractSecondPassGroupingCollector<?> secondPassCollector = createSecondPassCollector(firstPassGroupingCollectors.get(shardIDX),
- "group", mergedTopGroups, groupSort, docSort, docOffset + topNDocs, getScores, getMaxScores, true);
+ groupField, mergedTopGroups, groupSort, docSort, docOffset + topNDocs, getScores, getMaxScores, true);
subSearchers[shardIDX].search(w, secondPassCollector);
shardTopGroups[shardIDX] = getTopGroups(secondPassCollector, 0);
if (VERBOSE) {
@@ -1243,7 +1258,7 @@ public class TestGrouping extends Lucene
}
}
- private void assertEquals(int[] docIDtoID, TopGroups<BytesRef> expected, TopGroups<BytesRef> actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores, boolean idvBasedImplsUsed) {
+ private void assertEquals(FieldCache.Ints docIDtoID, TopGroups<BytesRef> expected, TopGroups<BytesRef> actual, boolean verifyGroupValues, boolean verifyTotalGroupCount, boolean verifySortValues, boolean testScores, boolean idvBasedImplsUsed) {
if (expected == null) {
assertNull(actual);
return;
@@ -1290,8 +1305,8 @@ public class TestGrouping extends Lucene
for(int docIDX=0;docIDX<expectedFDs.length;docIDX++) {
final FieldDoc expectedFD = (FieldDoc) expectedFDs[docIDX];
final FieldDoc actualFD = (FieldDoc) actualFDs[docIDX];
- //System.out.println(" actual doc=" + docIDtoID[actualFD.doc] + " score=" + actualFD.score);
- assertEquals(expectedFD.doc, docIDtoID[actualFD.doc]);
+ //System.out.println(" actual doc=" + docIDtoID.get(actualFD.doc) + " score=" + actualFD.score);
+ assertEquals(expectedFD.doc, docIDtoID.get(actualFD.doc));
if (testScores) {
assertEquals(expectedFD.score, actualFD.score, 0.1);
} else {
Modified: lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java (original)
+++ lucene/dev/trunk/lucene/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTermExtractor.java Thu Feb 7 20:48:21 2013
@@ -31,11 +31,13 @@ import org.apache.lucene.analysis.Cachin
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.NumericDocValues;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
@@ -402,13 +404,23 @@ public class WeightedSpanTermExtractor {
}
@Override
- public DocValues docValues(String field) throws IOException {
- return super.docValues(FIELD_NAME);
+ public NumericDocValues getNumericDocValues(String field) throws IOException {
+ return super.getNumericDocValues(FIELD_NAME);
}
-
+
+ @Override
+ public BinaryDocValues getBinaryDocValues(String field) throws IOException {
+ return super.getBinaryDocValues(FIELD_NAME);
+ }
+
+ @Override
+ public SortedDocValues getSortedDocValues(String field) throws IOException {
+ return super.getSortedDocValues(FIELD_NAME);
+ }
+
@Override
- public DocValues normValues(String field) throws IOException {
- return super.normValues(FIELD_NAME);
+ public NumericDocValues getNormValues(String field) throws IOException {
+ return super.getNormValues(FIELD_NAME);
}
}
Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsCollector.java Thu Feb 7 20:48:21 2013
@@ -17,7 +17,10 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
@@ -26,8 +29,6 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
-import java.io.IOException;
-
/**
* A collector that collects all terms from a specified field matching the query.
*
@@ -109,7 +110,7 @@ abstract class TermsCollector extends Co
static class SV extends TermsCollector {
final BytesRef spare = new BytesRef();
- private FieldCache.DocTerms fromDocTerms;
+ private BinaryDocValues fromDocTerms;
SV(String field) {
super(field);
@@ -117,7 +118,8 @@ abstract class TermsCollector extends Co
@Override
public void collect(int doc) throws IOException {
- collectorTerms.add(fromDocTerms.getTerm(doc, spare));
+ fromDocTerms.get(doc, spare);
+ collectorTerms.add(spare);
}
@Override
Modified: lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java (original)
+++ lucene/dev/trunk/lucene/join/src/java/org/apache/lucene/search/join/TermsWithScoreCollector.java Thu Feb 7 20:48:21 2013
@@ -17,7 +17,10 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
+import java.io.IOException;
+
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
@@ -27,8 +30,6 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
-import java.io.IOException;
-
abstract class TermsWithScoreCollector extends Collector {
private final static int INITIAL_ARRAY_SIZE = 256;
@@ -92,7 +93,7 @@ abstract class TermsWithScoreCollector e
static class SV extends TermsWithScoreCollector {
final BytesRef spare = new BytesRef();
- FieldCache.DocTerms fromDocTerms;
+ BinaryDocValues fromDocTerms;
SV(String field, ScoreMode scoreMode) {
super(field, scoreMode);
@@ -100,7 +101,8 @@ abstract class TermsWithScoreCollector e
@Override
public void collect(int doc) throws IOException {
- int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
+ fromDocTerms.get(doc, spare);
+ int ord = collectedTerms.add(spare);
if (ord < 0) {
ord = -ord - 1;
} else {
@@ -141,7 +143,8 @@ abstract class TermsWithScoreCollector e
@Override
public void collect(int doc) throws IOException {
- int ord = collectedTerms.add(fromDocTerms.getTerm(doc, spare));
+ fromDocTerms.get(doc, spare);
+ int ord = collectedTerms.add(spare);
if (ord < 0) {
ord = -ord - 1;
} else {
Modified: lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java (original)
+++ lucene/dev/trunk/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java Thu Feb 7 20:48:21 2013
@@ -17,6 +17,9 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
+import java.io.IOException;
+import java.util.*;
+
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@@ -24,6 +27,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
@@ -49,14 +53,11 @@ import org.apache.lucene.search.TopScore
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.util.LuceneTestCase.Slow;
import org.junit.Test;
-import java.io.IOException;
-import java.util.*;
-
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
@@ -523,13 +524,14 @@ public class TestJoinUtil extends Lucene
fromSearcher.search(new TermQuery(new Term("value", uniqueRandomValue)), new Collector() {
private Scorer scorer;
- private FieldCache.DocTerms terms;
+ private BinaryDocValues terms;
private final BytesRef spare = new BytesRef();
@Override
public void collect(int doc) throws IOException {
- BytesRef joinValue = terms.getTerm(doc, spare);
- if (joinValue == null) {
+ terms.get(doc, spare);
+ BytesRef joinValue = spare;
+ if (joinValue.bytes == BinaryDocValues.MISSING) {
return;
}
@@ -641,13 +643,14 @@ public class TestJoinUtil extends Lucene
} else {
toSearcher.search(new MatchAllDocsQuery(), new Collector() {
- private FieldCache.DocTerms terms;
+ private BinaryDocValues terms;
private int docBase;
private final BytesRef spare = new BytesRef();
@Override
public void collect(int doc) {
- JoinScore joinScore = joinValueToJoinScores.get(terms.getTerm(doc, spare));
+ terms.get(doc, spare);
+ JoinScore joinScore = joinValueToJoinScores.get(spare);
if (joinScore == null) {
return;
}
Modified: lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1443717&r1=1443716&r2=1443717&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/trunk/lucene/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Feb 7 20:48:21 2013
@@ -35,21 +35,21 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.Norm;
-import org.apache.lucene.index.DocValues;
+import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo.IndexOptions;
+import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdTermState;
+import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.FieldInfo.IndexOptions;
-import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleValueSource;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -60,13 +60,13 @@ import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.Constants; // for javadocs
import org.apache.lucene.util.Counter;
-import org.apache.lucene.util.IntBlockPool;
-import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
import org.apache.lucene.util.IntBlockPool.SliceReader;
import org.apache.lucene.util.IntBlockPool.SliceWriter;
-import org.apache.lucene.util.Constants; // for javadocs
+import org.apache.lucene.util.IntBlockPool;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.RecyclingByteBlockAllocator;
import org.apache.lucene.util.RecyclingIntBlockAllocator;
@@ -738,6 +738,21 @@ public class MemoryIndex {
return new FieldInfos(fieldInfos.values().toArray(new FieldInfo[fieldInfos.size()]));
}
+ @Override
+ public NumericDocValues getNumericDocValues(String field) {
+ return null;
+ }
+
+ @Override
+ public BinaryDocValues getBinaryDocValues(String field) {
+ return null;
+ }
+
+ @Override
+ public SortedDocValues getSortedDocValues(String field) {
+ return null;
+ }
+
private class MemoryFields extends Fields {
@Override
public Iterator<String> iterator() {
@@ -1127,23 +1142,18 @@ public class MemoryIndex {
protected void doClose() {
if (DEBUG) System.err.println("MemoryIndexReader.doClose");
}
-
- @Override
- public DocValues docValues(String field) {
- return null;
- }
/** performance hack: cache norms to avoid repeated expensive calculations */
- private DocValues cachedNormValues;
+ private NumericDocValues cachedNormValues;
private String cachedFieldName;
private Similarity cachedSimilarity;
@Override
- public DocValues normValues(String field) {
+ public NumericDocValues getNormValues(String field) {
FieldInfo fieldInfo = fieldInfos.get(field);
if (fieldInfo == null || fieldInfo.omitsNorms())
return null;
- DocValues norms = cachedNormValues;
+ NumericDocValues norms = cachedNormValues;
Similarity sim = getSimilarity();
if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached?
Info info = getInfo(field);
@@ -1151,15 +1161,13 @@ public class MemoryIndex {
int numOverlapTokens = info != null ? info.numOverlapTokens : 0;
float boost = info != null ? info.getBoost() : 1.0f;
FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost);
- Norm norm = new Norm();
- sim.computeNorm(invertState, norm);
- SingleValueSource singleByteSource = new SingleValueSource(norm);
- norms = new MemoryIndexNormDocValues(singleByteSource);
+ long value = sim.computeNorm(invertState);
+ norms = new MemoryIndexNormDocValues(value);
// cache it for future reuse
cachedNormValues = norms;
cachedFieldName = field;
cachedSimilarity = sim;
- if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + norm + ":" + numTokens);
+ if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + value + ":" + numTokens);
}
return norms;
}