You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/01/26 10:00:05 UTC
svn commit: r1063638 - in /lucene/dev/branches/bulkpostings/lucene:
contrib/memory/src/test/org/apache/lucene/index/memory/
src/test/org/apache/lucene/index/
Author: simonw
Date: Wed Jan 26 09:00:05 2011
New Revision: 1063638
URL: http://svn.apache.org/viewvc?rev=1063638&view=rev
Log:
LUCENE-2723: added testcases for BulkPostings
Added:
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java (with props)
Modified:
lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1063638&r1=1063637&r2=1063638&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Wed Jan 26 09:00:05 2011
@@ -31,12 +31,18 @@ import org.apache.lucene.analysis.MockTo
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.index.BulkPostingsEnumWrapper;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ReaderUtil;
import org.apache.lucene.util._TestUtil;
/**
@@ -80,6 +86,34 @@ public class MemoryIndexTest extends Bas
for (int i = 0; i < ITERATIONS; i++)
assertAgainstRAMDirectory();
}
+
+
+ public void testBulkPostings() throws IOException {
+ String fieldName = "field";
+ final int num = random.nextInt(1000);
+ MemoryIndex index = new MemoryIndex();
+ StringBuilder builder = new StringBuilder();
+ String content = "1 2 3 4 5 6 7 8 9 10 ";
+ for (int i = 0; i < num; i++) {
+ builder.append(content);
+ }
+ content = builder.toString();
+ index.addField(fieldName, content, new MockAnalyzer(
+ MockTokenizer.WHITESPACE, true, false));
+ IndexSearcher searcher = index.createSearcher();
+ AtomicReaderContext leaf = ReaderUtil
+ .leaves(searcher.getTopReaderContext())[0];
+ DocsAndPositionsEnum docsAndPos = new BulkPostingsEnumWrapper(
+ leaf.reader.bulkTermPostingsEnum(fieldName, new BytesRef("1"), true,
+ true), null, 1);
+ assertNotNull(docsAndPos);
+ assertEquals(0, docsAndPos.nextDoc());
+ for (int i = 0; i < num; i++) {
+ assertEquals(num, docsAndPos.freq());
+ assertEquals(i * 10, docsAndPos.nextPosition());
+ }
+ assertEquals(docsAndPos.nextDoc(), Scorer.NO_MORE_DOCS);
+ }
/**
* Build a randomish document for both RAMDirectory and MemoryIndex,
Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java?rev=1063638&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java Wed Jan 26 09:00:05 2011
@@ -0,0 +1,275 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class wraps a {@link BulkPostingsEnum} to use it as a {@link DocsAndPositionsEnum}
+ * <p>
+ * Note: Payloads are currently not supported with this wrapper
+ */
+public class BulkPostingsEnumWrapper extends DocsAndPositionsEnum {
+ private final BulkPostingsEnum docsEnum;
+
+ private final BlockReader freqsReader;
+ private final BlockReader docDeltasReader;
+ private BlockReader positionDeltaReader;
+
+ private final int[] docDeltas;
+ private int docPointer;
+ private int docPointerMax;
+ private boolean first = true;
+
+ private final int[] freqs;
+ private int freqPointer;
+ private int freqPointerMax;
+
+ private final int[] pos;
+ private int posPointer;
+ private int posPointerMax;
+ private int positionsPending;
+ private int currentPos;
+
+ private final Bits skipDocs;
+
+ private int doc;
+ private int docFreq;
+ private int count;
+
+ /**
+ * Creates a new {@link BulkPostingsEnumWrapper}
+ */
+ public BulkPostingsEnumWrapper(BulkPostingsEnum bulkPostingsEnum,
+ Bits skipDoc, int docFreq) throws IOException {
+ this.docsEnum = bulkPostingsEnum;
+ this.docFreq = docFreq;
+
+ this.docDeltasReader = bulkPostingsEnum.getDocDeltasReader();
+ this.docDeltas = docDeltasReader.getBuffer();
+ this.freqsReader = bulkPostingsEnum.getFreqsReader();
+ this.freqs = freqsReader == null ? null : freqsReader.getBuffer();
+ this.positionDeltaReader = bulkPostingsEnum.getPositionDeltasReader();
+ this.pos = positionDeltaReader == null ? null : positionDeltaReader
+ .getBuffer();
+ this.skipDocs = skipDoc;
+ reset();
+
+ }
+
+ @Override
+ public int nextPosition() throws IOException {
+ if (positionDeltaReader != null) {
+ if (--positionsPending >= 0) {
+ if (++posPointer >= posPointerMax) {
+ posPointerMax = positionDeltaReader.fill();
+ assert posPointerMax != 0;
+ posPointer = 0;
+ }
+ currentPos += pos[posPointer];
+ return currentPos;
+ }
+ currentPos = 0;
+ positionsPending = 0;
+ }
+ return -1;
+ }
+
+ @Override
+ public BytesRef getPayload() throws IOException {
+ // TODO
+ return null;
+ }
+
+ @Override
+ public boolean hasPayload() {
+ // TODO
+ return false;
+ }
+
+ @Override
+ public int freq() {
+ return freqsReader == null ? 1 : freqs[freqPointer];
+ }
+
+ @Override
+ public int docID() {
+ return doc;
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ while (count < docFreq) {
+ fillDeltas();
+ fillFreq();
+ count++;
+ doc += docDeltas[docPointer];
+ first = false;
+ assert doc >= 0 && (skipDocs == null || doc < skipDocs.length())
+ && doc != NO_MORE_DOCS : "doc=" + doc + " skipDocs=" + skipDocs
+ + " skipDocs.length="
+ + (skipDocs == null ? "n/a" : skipDocs.length());
+ if (skipDocs == null || !skipDocs.get(doc)) {
+ return doc;
+ }
+ }
+
+ return doc = NO_MORE_DOCS;
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ // nocommit: should we, here, optimize .advance(target that isn't
+ // too far away) into scan? seems like simple win?
+ // first scan current doc deltas block
+ for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ first = false;
+ count++;
+ fillFreq();
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
+ }
+ }
+
+ if (count == docFreq) {
+ return doc = NO_MORE_DOCS;
+ }
+
+ // not found in current block, seek underlying stream
+ final BulkPostingsEnum.JumpResult jumpResult;
+ if (target - doc > docDeltas.length && // avoid useless jumps
+ (jumpResult = docsEnum.jump(target, count)) != null) {
+ count = jumpResult.count;
+ doc = jumpResult.docID;
+ first = false;
+ reset();
+ } else {
+ // seek did not jump -- just fill next buffer
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ assert first || docDeltas[0] > 0;
+ doc += docDeltas[0];
+ count++;
+ first = false;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ fillFreq();
+ }
+
+ // now scan -- let the compiler inline this
+ return scan(target);
+ }
+
+ private int scan(final int target) throws IOException {
+ while (true) {
+ assert doc >= 0 && doc != NO_MORE_DOCS;
+ if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+ return doc;
+ }
+
+ if (count >= docFreq) {
+ break;
+ }
+
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ if (docPointerMax != 0) {
+ docPointer = 0;
+ } else {
+ return doc = NO_MORE_DOCS;
+ }
+ }
+
+ fillFreq();
+ assert first || docDeltas[docPointer] > 0;
+ doc += docDeltas[docPointer];
+ count++;
+ }
+ return doc = NO_MORE_DOCS;
+ }
+
+ private void fillDeltas() throws IOException {
+ if (++docPointer >= docPointerMax) {
+ docPointerMax = docDeltasReader.fill();
+ assert docPointerMax != 0;
+ docPointer = 0;
+ }
+ }
+
+ private void fillFreq() throws IOException {
+ if (freqsReader != null) {
+ if (++freqPointer >= freqPointerMax) {
+ freqPointerMax = freqsReader.fill();
+ assert freqPointerMax != 0;
+ freqPointer = 0;
+ }
+
+ if (positionDeltaReader != null) {
+ if (positionsPending > 0) {
+ posPointer += positionsPending;
+ while (posPointer >= posPointerMax) { // we need while here if
+ // numPos
+ // > buffersize
+ posPointer -= posPointerMax; // add the pending positions from
+ // last
+ // round
+ posPointerMax = positionDeltaReader.fill();
+ assert posPointerMax != 0;
+ }
+ } else if (posPointer + 1 >= posPointerMax) {
+ posPointerMax = positionDeltaReader.fill();
+ assert posPointerMax != 0;
+ posPointer = -1;
+ }
+ currentPos = 0;
+ positionsPending = freqs[freqPointer];
+ }
+ }
+ }
+
+ private final void reset() throws IOException {
+ docPointer = docDeltasReader.offset();
+ docPointerMax = docDeltasReader.end();
+ assert docPointerMax >= docPointer : "dP=" + docPointer + " dPMax="
+ + docPointerMax;
+ if (freqsReader != null) { // do we have freqs?
+ freqPointer = freqsReader.offset();
+ freqPointerMax = freqsReader.end();
+ assert freqPointerMax >= freqPointer : "fP=" + freqPointer + " fPMax="
+ + freqPointerMax;
+ --docPointer;
+ --freqPointer;
+
+ if (positionDeltaReader != null) { // compiler should optimize this away
+ currentPos = 0;
+ posPointer = positionDeltaReader.offset();
+ posPointerMax = positionDeltaReader.end();
+ assert posPointerMax >= posPointer : "pP=" + posPointer + " pPMax="
+ + posPointerMax;
+ --posPointer;
+ positionsPending = 0;
+ }
+ }
+ }
+}
\ No newline at end of file
Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java?rev=1063638&r1=1063637&r2=1063638&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Wed Jan 26 09:00:05 2011
@@ -92,17 +92,36 @@ public class TestDocsAndPositions extend
public DocsAndPositionsEnum getDocsAndPositions(IndexReader reader,
BytesRef bytes, Bits skipDocs) throws IOException {
+ if (random.nextInt(10) == 0) { // once in a while throw in a non-bulk reader
return reader.termPositionsEnum(null, fieldName, bytes);
+ } else {
+ BulkPostingsEnum bulkTermPostingsEnum = reader.bulkTermPostingsEnum(
+ fieldName, bytes, true, true);
+ if (bulkTermPostingsEnum == null){
+ return null;
+ }
+ return new BulkPostingsEnumWrapper(bulkTermPostingsEnum, null,
+ reader.docFreq(new Term(fieldName, bytes)));
+ }
}
public DocsEnum getDocsEnum(IndexReader reader, BytesRef bytes,
boolean freqs, Bits skipDocs) throws IOException {
int randInt = random.nextInt(10);
- if (randInt == 0) { // once in a while throw in a positions enum
+ if (randInt == 0) { // once in a while throw in a non-bulk reader
+ return reader.termDocsEnum(skipDocs, fieldName, bytes);
+ } else if (randInt == 5) {
+
return getDocsAndPositions(reader, bytes, skipDocs);
} else {
- return reader.termDocsEnum(skipDocs, fieldName, bytes);
- }
+ BulkPostingsEnum bulkTermPostingsEnum = reader.bulkTermPostingsEnum(
+ fieldName, bytes, freqs, false);
+ if (bulkTermPostingsEnum == null) {
+ return null;
+ }
+ return new BulkPostingsEnumWrapper(bulkTermPostingsEnum, skipDocs,
+ reader.docFreq(new Term(fieldName, bytes)));
+ }
}
/**