You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/11 15:49:31 UTC
[15/21] lucene-solr:branch_6x: LUCENE-6766: simplify test case
LUCENE-6766: simplify test case
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c26bb871
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c26bb871
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c26bb871
Branch: refs/heads/branch_6x
Commit: c26bb87140eacbcdfa6c083a10714af275fe4ab6
Parents: 365436e
Author: Mike McCandless <mi...@apache.org>
Authored: Sun May 8 06:20:59 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Sat Jun 11 11:48:40 2016 -0400
----------------------------------------------------------------------
.../lucene62/Lucene62SegmentInfoFormat.java | 2 -
.../apache/lucene/index/TestIndexSorting.java | 356 ++++---------------
2 files changed, 78 insertions(+), 280 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c26bb871/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
index bb52eeb..762b2c0 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene62/Lucene62SegmentInfoFormat.java
@@ -264,7 +264,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
case FLOAT:
sortTypeID = 4;
break;
- // nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
@@ -302,7 +301,6 @@ public class Lucene62SegmentInfoFormat extends SegmentInfoFormat {
output.writeByte((byte) 1);
output.writeLong(Float.floatToIntBits(((Float) missingValue).floatValue()));
break;
- // nocommit the rest:
default:
throw new IllegalStateException("Unexpected sort type: " + sortField.getType());
}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c26bb871/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
index 15d18fb..278aadc 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexSorting.java
@@ -22,9 +22,11 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
+import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
@@ -610,7 +612,7 @@ public class TestIndexSorting extends LuceneTestCase {
@Override
public long computeNorm(FieldInvertState state) {
- if (state.getName().equals(NORMS_FIELD)) {
+ if (state.getName().equals("norms")) {
return Float.floatToIntBits(state.getBoost());
} else {
return in.computeNorm(state);
@@ -650,7 +652,7 @@ public class TestIndexSorting extends LuceneTestCase {
}
clearAttributes();
- term.append(DOC_POSITIONS_TERM);
+ term.append("#all#");
payload.setPayload(new BytesRef(Integer.toString(pos)));
offset.setOffset(off, off);
--pos;
@@ -664,296 +666,94 @@ public class TestIndexSorting extends LuceneTestCase {
}
}
- private static Directory dir;
- private static IndexReader sortedReader;
+ // nocommit testrandom1 with deletions
- private static final FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
- TERM_VECTORS_TYPE.setStoreTermVectors(true);
- TERM_VECTORS_TYPE.freeze();
- }
-
- private static final FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
- static {
+ public void testRandom1() throws Exception {
+ int numDocs = atLeast(100);
+
+ FieldType POSITIONS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
POSITIONS_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
POSITIONS_TYPE.freeze();
- }
-
- private static final String ID_FIELD = "id";
- private static final String DOCS_ENUM_FIELD = "docs";
- private static final String DOCS_ENUM_TERM = "$all$";
- private static final String DOC_POSITIONS_FIELD = "positions";
- private static final String DOC_POSITIONS_TERM = "$all$";
- private static final String NUMERIC_DV_FIELD = "numeric";
- private static final String SORTED_NUMERIC_DV_FIELD = "sorted_numeric";
- private static final String NORMS_FIELD = "norm";
- private static final String BINARY_DV_FIELD = "binary";
- private static final String SORTED_DV_FIELD = "sorted";
- private static final String SORTED_SET_DV_FIELD = "sorted_set";
- private static final String TERM_VECTORS_FIELD = "term_vectors";
- private static final String DIMENSIONAL_FIELD = "numeric1d";
-
- private static Document doc(final int id, PositionsTokenStream positions) {
- final Document doc = new Document();
- doc.add(new StringField(ID_FIELD, Integer.toString(id), Store.YES));
- doc.add(new StringField(DOCS_ENUM_FIELD, DOCS_ENUM_TERM, Store.NO));
- positions.setId(id);
- doc.add(new Field(DOC_POSITIONS_FIELD, positions, POSITIONS_TYPE));
- doc.add(new NumericDocValuesField(NUMERIC_DV_FIELD, id));
- TextField norms = new TextField(NORMS_FIELD, Integer.toString(id), Store.NO);
- norms.setBoost(Float.intBitsToFloat(id));
- doc.add(norms);
- doc.add(new BinaryDocValuesField(BINARY_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedDocValuesField(SORTED_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id))));
- doc.add(new SortedSetDocValuesField(SORTED_SET_DV_FIELD, new BytesRef(Integer.toString(id + 1))));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id));
- doc.add(new SortedNumericDocValuesField(SORTED_NUMERIC_DV_FIELD, id + 1));
- doc.add(new Field(TERM_VECTORS_FIELD, Integer.toString(id), TERM_VECTORS_TYPE));
- byte[] bytes = new byte[4];
- NumericUtils.intToSortableBytes(id, bytes, 0);
- doc.add(new BinaryPoint(DIMENSIONAL_FIELD, bytes));
- return doc;
- }
-
- @AfterClass
- public static void afterClass() throws Exception {
- if (sortedReader != null) {
- sortedReader.close();
- sortedReader = null;
- }
- if (dir != null) {
- dir.close();
- dir = null;
- }
- }
- @BeforeClass
- public static void createIndex() throws Exception {
- dir = newFSDirectory(createTempDir());
- int numDocs = atLeast(100);
+ FieldType TERM_VECTORS_TYPE = new FieldType(TextField.TYPE_NOT_STORED);
+ TERM_VECTORS_TYPE.setStoreTermVectors(true);
+ TERM_VECTORS_TYPE.freeze();
- List<Integer> ids = new ArrayList<>();
- for (int i = 0; i < numDocs; i++) {
- ids.add(Integer.valueOf(i * 10));
- }
- // shuffle them for indexing
- Collections.shuffle(ids, random());
- if (VERBOSE) {
- System.out.println("Shuffled IDs for indexing: " + Arrays.toString(ids.toArray()));
- }
+ List<Document> docs = new ArrayList<>();
+ for (int i=0;i<numDocs;i++) {
+ int id = i * 10;
+ Document doc = new Document();
+ doc.add(new StringField("id", Integer.toString(id), Store.YES));
+ doc.add(new StringField("docs", "#all#", Store.NO));
+ PositionsTokenStream positions = new PositionsTokenStream();
+ positions.setId(id);
+ doc.add(new Field("positions", positions, POSITIONS_TYPE));
+ doc.add(new NumericDocValuesField("numeric", id));
+ TextField norms = new TextField("norms", Integer.toString(id), Store.NO);
+ norms.setBoost(Float.intBitsToFloat(id));
+ doc.add(norms);
+ doc.add(new BinaryDocValuesField("binary", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedDocValuesField("sorted", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id))));
+ doc.add(new SortedSetDocValuesField("sorted_set", new BytesRef(Integer.toString(id + 1))));
+ doc.add(new SortedNumericDocValuesField("sorted_numeric", id));
+ doc.add(new SortedNumericDocValuesField("sorted_numeric", id + 1));
+ doc.add(new Field("term_vectors", Integer.toString(id), TERM_VECTORS_TYPE));
+ byte[] bytes = new byte[4];
+ NumericUtils.intToSortableBytes(id, bytes, 0);
+ doc.add(new BinaryPoint("points", bytes));
+ docs.add(doc);
+ }
+
+ // Must use the same seed for both RandomIndexWriters so they behave identically
+ long seed = random().nextLong();
+
+ // We add document alread in ID order for the first writer:
+ Directory dir1 = newFSDirectory(createTempDir());
- PositionsTokenStream positions = new PositionsTokenStream();
- IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
- conf.setMaxBufferedDocs(4); // create some segments
- conf.setSimilarity(new NormsSimilarity(conf.getSimilarity())); // for testing norms field
- // nocommit
- conf.setMergeScheduler(new SerialMergeScheduler());
- // sort the index by id (as integer, in NUMERIC_DV_FIELD)
- conf.setIndexSort(new Sort(new SortField(NUMERIC_DV_FIELD, SortField.Type.INT)));
- RandomIndexWriter writer = new RandomIndexWriter(random(), dir, conf);
- writer.setDoRandomForceMerge(false);
- for (int id : ids) {
- writer.addDocument(doc(id, positions));
- }
- // delete some documents
- writer.commit();
- // nocommit need thread safety test too
- for (Integer id : ids) {
- if (random().nextDouble() < 0.2) {
- if (VERBOSE) {
- System.out.println("delete doc_id " + id);
- }
- writer.deleteDocuments(new Term(ID_FIELD, id.toString()));
- }
+ IndexWriterConfig iwc1 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc1.setSimilarity(new NormsSimilarity(iwc1.getSimilarity())); // for testing norms field
+ // preserve docIDs
+ iwc1.setMergePolicy(newLogMergePolicy());
+ if (VERBOSE) {
+ System.out.println("TEST: now index pre-sorted");
}
-
- sortedReader = writer.getReader();
- writer.close();
-
- TestUtil.checkReader(sortedReader);
- }
-
- // nocommit just do assertReaderEquals, don't use @BeforeClass, etc.?
-
- public void testBinaryDocValuesField() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- BinaryDocValues dv = reader.getBinaryDocValues(BINARY_DV_FIELD);
- boolean isSorted = reader.getIndexSort() != null;
- int lastID = Integer.MIN_VALUE;
- for (int docID = 0; docID < reader.maxDoc(); docID++) {
- BytesRef bytes = dv.get(docID);
- String idString = reader.document(docID).get(ID_FIELD);
- assertEquals("incorrect binary DocValues for doc " + docID, idString, bytes.utf8ToString());
- if (isSorted) {
- int id = Integer.parseInt(idString);
- assertTrue("lastID=" + lastID + " vs id=" + id, lastID < id);
- lastID = id;
- }
- }
+ RandomIndexWriter w1 = new RandomIndexWriter(new Random(seed), dir1, iwc1);
+ for(Document doc : docs) {
+ ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
+ w1.addDocument(doc);
}
- }
- public void testPostings() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- TermsEnum termsEnum = reader.terms(DOC_POSITIONS_FIELD).iterator();
- assertEquals(SeekStatus.FOUND, termsEnum.seekCeil(new BytesRef(DOC_POSITIONS_TERM)));
- PostingsEnum sortedPositions = termsEnum.postings(null, PostingsEnum.ALL);
- int doc;
-
- // test nextDoc()
- while ((doc = sortedPositions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertEquals("incorrect freq for doc=" + doc, id / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
+ // We shuffle documents, but set index sort, for the second writer:
+ Directory dir2 = newFSDirectory(createTempDir());
- // test advance()
- final PostingsEnum reuse = sortedPositions;
- sortedPositions = termsEnum.postings(reuse, PostingsEnum.ALL);
-
- doc = 0;
- while ((doc = sortedPositions.advance(doc + TestUtil.nextInt(random(), 1, 5))) != DocIdSetIterator.NO_MORE_DOCS) {
- int freq = sortedPositions.freq();
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertEquals("incorrect freq for doc=" + doc, id / 10 + 1, freq);
- for (int i = 0; i < freq; i++) {
- assertEquals("incorrect position for doc=" + doc, i, sortedPositions.nextPosition());
- assertEquals("incorrect startOffset for doc=" + doc, i, sortedPositions.startOffset());
- assertEquals("incorrect endOffset for doc=" + doc, i, sortedPositions.endOffset());
- assertEquals("incorrect payload for doc=" + doc, freq - i, Integer.parseInt(sortedPositions.getPayload().utf8ToString()));
- }
- }
- }
- }
-
- public void testDocsAreSortedByID() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- if (reader.getIndexSort() != null) {
- int maxDoc = reader.maxDoc();
- int lastID = Integer.MIN_VALUE;
- for(int doc=0;doc<maxDoc;doc++) {
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertTrue(id > lastID);
- lastID = id;
- }
- }
- }
- }
+ IndexWriterConfig iwc2 = newIndexWriterConfig(new MockAnalyzer(random()));
+ iwc2.setSimilarity(new NormsSimilarity(iwc2.getSimilarity())); // for testing norms field
- public void testNormValues() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- NumericDocValues dv = reader.getNormValues(NORMS_FIELD);
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertEquals("incorrect norm value for doc " + doc, id, dv.get(doc));
- }
- }
- }
-
- public void testNumericDocValuesField() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- NumericDocValues dv = reader.getNumericDocValues(NUMERIC_DV_FIELD);
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertEquals("incorrect numeric DocValues for doc " + doc, id, dv.get(doc));
- }
- }
- }
-
- public void testSortedDocValuesField() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- SortedDocValues dv = reader.getSortedDocValues(SORTED_DV_FIELD);
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- final BytesRef bytes = dv.get(doc);
- String id = reader.document(doc).get(ID_FIELD);
- assertEquals("incorrect sorted DocValues for doc " + doc, id, bytes.utf8ToString());
- }
- }
- }
-
- public void testSortedSetDocValuesField() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- SortedSetDocValues dv = reader.getSortedSetDocValues(SORTED_SET_DV_FIELD);
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- dv.setDocument(doc);
- BytesRef bytes = dv.lookupOrd(dv.nextOrd());
- String id = reader.document(doc).get(ID_FIELD);
- assertEquals("incorrect sorted-set DocValues for doc " + doc, id, bytes.utf8ToString());
- bytes = dv.lookupOrd(dv.nextOrd());
- assertEquals("incorrect sorted-set DocValues for doc " + doc, Integer.valueOf(Integer.parseInt(id) + 1).toString(), bytes.utf8ToString());
- assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
- }
- }
- }
+ Sort sort = new Sort(new SortField("numeric", SortField.Type.INT));
+ iwc2.setIndexSort(sort);
- public void testSortedNumericDocValuesField() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- SortedNumericDocValues dv = reader.getSortedNumericDocValues(SORTED_NUMERIC_DV_FIELD);
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- dv.setDocument(doc);
- assertEquals(2, dv.count());
- int id = Integer.parseInt(reader.document(doc).get(ID_FIELD));
- assertEquals("incorrect sorted-numeric DocValues for doc " + doc, id, dv.valueAt(0));
- assertEquals("incorrect sorted-numeric DocValues for doc " + doc, id + 1, dv.valueAt(1));
- }
- }
- }
-
- public void testTermVectors() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- LeafReader reader = ctx.reader();
- int maxDoc = reader.maxDoc();
- for (int doc = 0; doc < maxDoc; doc++) {
- Terms terms = reader.getTermVector(doc, TERM_VECTORS_FIELD);
- assertNotNull("term vectors not found for doc " + doc + " field [" + TERM_VECTORS_FIELD + "]", terms);
- String id = reader.document(doc).get(ID_FIELD);
- assertEquals("incorrect term vector for doc " + doc, id, terms.iterator().next().utf8ToString());
+ Collections.shuffle(docs, random());
+ if (VERBOSE) {
+ System.out.println("TEST: now index with index-time sorting");
+ }
+ RandomIndexWriter w2 = new RandomIndexWriter(new Random(seed), dir2, iwc2);
+ int count = 0;
+ int commitAtCount = TestUtil.nextInt(random(), 1, numDocs-1);
+ for(Document doc : docs) {
+ ((PositionsTokenStream) ((Field) doc.getField("positions")).tokenStreamValue()).setId(Integer.parseInt(doc.get("id")));
+ if (count++ == commitAtCount) {
+ // Ensure forceMerge really does merge
+ w2.commit();
}
+ w2.addDocument(doc);
}
- }
+ w2.forceMerge(1);
- public void testPoints() throws Exception {
- for(LeafReaderContext ctx : sortedReader.leaves()) {
- final LeafReader reader = ctx.reader();
- PointValues values = reader.getPointValues();
- values.intersect(DIMENSIONAL_FIELD,
- new IntersectVisitor() {
- @Override
- public void visit(int docID) {
- throw new IllegalStateException();
- }
-
- @Override
- public void visit(int docID, byte[] packedValues) throws IOException {
- int id = Integer.parseInt(reader.document(docID).get(ID_FIELD));
- assertEquals(id, NumericUtils.sortableBytesToInt(packedValues, 0));
- }
-
- @Override
- public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
- return Relation.CELL_CROSSES_QUERY;
- }
- });
- }
+ DirectoryReader r1 = w1.getReader();
+ DirectoryReader r2 = w2.getReader();
+ assertEquals(sort, getOnlyLeafReader(r2).getIndexSort());
+ assertReaderEquals("left: sorted by hand; right: sorted by Lucene", r1, r2);
+ IOUtils.close(w1, w2, r1, r2, dir1, dir2);
}
}