You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/01/26 10:00:05 UTC

svn commit: r1063638 - in /lucene/dev/branches/bulkpostings/lucene: contrib/memory/src/test/org/apache/lucene/index/memory/ src/test/org/apache/lucene/index/

Author: simonw
Date: Wed Jan 26 09:00:05 2011
New Revision: 1063638

URL: http://svn.apache.org/viewvc?rev=1063638&view=rev
Log:
LUCENE-2723: added testcases for BulkPostings 

Added:
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java   (with props)
Modified:
    lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
    lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java

Modified: lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java?rev=1063638&r1=1063637&r2=1063638&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java Wed Jan 26 09:00:05 2011
@@ -31,12 +31,18 @@ import org.apache.lucene.analysis.MockTo
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.index.BulkPostingsEnumWrapper;
+import org.apache.lucene.index.DocsAndPositionsEnum;
+import org.apache.lucene.index.IndexReader.AtomicReaderContext;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.ReaderUtil;
 import org.apache.lucene.util._TestUtil;
 
 /**
@@ -80,6 +86,34 @@ public class MemoryIndexTest extends Bas
     for (int i = 0; i < ITERATIONS; i++)
       assertAgainstRAMDirectory();
   }
+  
+  
+  public void testBulkPostings() throws IOException {
+    String fieldName = "field";
+    final int num = random.nextInt(1000);
+    MemoryIndex index = new MemoryIndex();
+    StringBuilder builder = new StringBuilder();
+    String content = "1 2 3 4 5 6 7 8 9 10 ";
+    for (int i = 0; i < num; i++) {
+      builder.append(content);
+    }
+    content = builder.toString();
+    index.addField(fieldName, content, new MockAnalyzer(
+        MockTokenizer.WHITESPACE, true, false));
+    IndexSearcher searcher = index.createSearcher();
+    AtomicReaderContext leaf = ReaderUtil
+        .leaves(searcher.getTopReaderContext())[0];
+    DocsAndPositionsEnum docsAndPos = new BulkPostingsEnumWrapper(
+        leaf.reader.bulkTermPostingsEnum(fieldName, new BytesRef("1"), true,
+            true), null, 1);
+    assertNotNull(docsAndPos);
+    assertEquals(0, docsAndPos.nextDoc());
+    for (int i = 0; i < num; i++) {
+      assertEquals(num, docsAndPos.freq());
+      assertEquals(i * 10, docsAndPos.nextPosition());
+    }
+    assertEquals(docsAndPos.nextDoc(), Scorer.NO_MORE_DOCS);
+  }
 
   /**
    * Build a randomish document for both RAMDirectory and MemoryIndex,

Added: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java?rev=1063638&view=auto
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java (added)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/BulkPostingsEnumWrapper.java Wed Jan 26 09:00:05 2011
@@ -0,0 +1,275 @@
+package org.apache.lucene.index;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to You under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+import java.io.IOException;
+
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * This class wraps a {@link BulkPostingsEnum} to use it as a {@link DocsAndPositionsEnum}  
+ * <p>
+ * Note: Payloads are currently not supported with this wrapper
+ */
+public class BulkPostingsEnumWrapper extends DocsAndPositionsEnum {
+  private final BulkPostingsEnum docsEnum;
+
+  private final BlockReader freqsReader;
+  private final BlockReader docDeltasReader;
+  private BlockReader positionDeltaReader;
+
+  private final int[] docDeltas;
+  private int docPointer;
+  private int docPointerMax;
+  private boolean first = true;
+
+  private final int[] freqs;
+  private int freqPointer;
+  private int freqPointerMax;
+
+  private final int[] pos;
+  private int posPointer;
+  private int posPointerMax;
+  private int positionsPending;
+  private int currentPos;
+
+  private final Bits skipDocs;
+
+  private int doc;
+  private int docFreq;
+  private int count;
+  
+  /**
+   * Creates a new {@link BulkPostingsEnumWrapper}
+   */
+  public BulkPostingsEnumWrapper(BulkPostingsEnum bulkPostingsEnum,
+      Bits skipDoc, int docFreq) throws IOException {
+    this.docsEnum = bulkPostingsEnum;
+    this.docFreq = docFreq;
+
+    this.docDeltasReader = bulkPostingsEnum.getDocDeltasReader();
+    this.docDeltas = docDeltasReader.getBuffer();
+    this.freqsReader = bulkPostingsEnum.getFreqsReader();
+    this.freqs = freqsReader == null ? null : freqsReader.getBuffer();
+    this.positionDeltaReader = bulkPostingsEnum.getPositionDeltasReader();
+    this.pos = positionDeltaReader == null ? null : positionDeltaReader
+        .getBuffer();
+    this.skipDocs = skipDoc;
+    reset();
+
+  }
+
+  @Override
+  public int nextPosition() throws IOException {
+    if (positionDeltaReader != null) {
+      if (--positionsPending >= 0) {
+        if (++posPointer >= posPointerMax) {
+          posPointerMax = positionDeltaReader.fill();
+          assert posPointerMax != 0;
+          posPointer = 0;
+        }
+        currentPos += pos[posPointer];
+        return currentPos;
+      }
+      currentPos = 0;
+      positionsPending = 0;
+    }
+    return -1;
+  }
+
+  @Override
+  public BytesRef getPayload() throws IOException {
+    // TODO
+    return null;
+  }
+
+  @Override
+  public boolean hasPayload() {
+    // TODO
+    return false;
+  }
+
+  @Override
+  public int freq() {
+    return freqsReader == null ? 1 : freqs[freqPointer];
+  }
+
+  @Override
+  public int docID() {
+    return doc;
+  }
+
+  @Override
+  public int nextDoc() throws IOException {
+    while (count < docFreq) {
+      fillDeltas();
+      fillFreq();
+      count++;
+      doc += docDeltas[docPointer];
+      first = false;
+      assert doc >= 0 && (skipDocs == null || doc < skipDocs.length())
+          && doc != NO_MORE_DOCS : "doc=" + doc + " skipDocs=" + skipDocs
+          + " skipDocs.length="
+          + (skipDocs == null ? "n/a" : skipDocs.length());
+      if (skipDocs == null || !skipDocs.get(doc)) {
+        return doc;
+      }
+    }
+
+    return doc = NO_MORE_DOCS;
+  }
+
+  @Override
+  public int advance(int target) throws IOException {
+    // nocommit: should we, here, optimize .advance(target that isn't
+    // too far away) into scan? seems like simple win?
+    // first scan current doc deltas block
+    for (docPointer++; docPointer < docPointerMax && count < docFreq; docPointer++) {
+      assert first || docDeltas[docPointer] > 0;
+      doc += docDeltas[docPointer];
+      first = false;
+      count++;
+      fillFreq();
+      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+        return doc;
+      }
+    }
+
+    if (count == docFreq) {
+      return doc = NO_MORE_DOCS;
+    }
+
+    // not found in current block, seek underlying stream
+    final BulkPostingsEnum.JumpResult jumpResult;
+    if (target - doc > docDeltas.length && // avoid useless jumps
+        (jumpResult = docsEnum.jump(target, count)) != null) {
+      count = jumpResult.count;
+      doc = jumpResult.docID;
+      first = false;
+      reset();
+    } else {
+      // seek did not jump -- just fill next buffer
+      docPointerMax = docDeltasReader.fill();
+      if (docPointerMax != 0) {
+        docPointer = 0;
+        assert first || docDeltas[0] > 0;
+        doc += docDeltas[0];
+        count++;
+        first = false;
+      } else {
+        return doc = NO_MORE_DOCS;
+      }
+      fillFreq();
+    }
+
+    // now scan -- let the compiler inline this
+    return scan(target);
+  }
+
+  private int scan(final int target) throws IOException {
+    while (true) {
+      assert doc >= 0 && doc != NO_MORE_DOCS;
+      if (doc >= target && (skipDocs == null || !skipDocs.get(doc))) {
+        return doc;
+      }
+
+      if (count >= docFreq) {
+        break;
+      }
+
+      if (++docPointer >= docPointerMax) {
+        docPointerMax = docDeltasReader.fill();
+        if (docPointerMax != 0) {
+          docPointer = 0;
+        } else {
+          return doc = NO_MORE_DOCS;
+        }
+      }
+
+      fillFreq();
+      assert first || docDeltas[docPointer] > 0;
+      doc += docDeltas[docPointer];
+      count++;
+    }
+    return doc = NO_MORE_DOCS;
+  }
+
+  private void fillDeltas() throws IOException {
+    if (++docPointer >= docPointerMax) {
+      docPointerMax = docDeltasReader.fill();
+      assert docPointerMax != 0;
+      docPointer = 0;
+    }
+  }
+
+  private void fillFreq() throws IOException {
+    if (freqsReader != null) {
+      if (++freqPointer >= freqPointerMax) {
+        freqPointerMax = freqsReader.fill();
+        assert freqPointerMax != 0;
+        freqPointer = 0;
+      }
+
+      if (positionDeltaReader != null) {
+        if (positionsPending > 0) {
+          posPointer += positionsPending;
+          while (posPointer >= posPointerMax) { // we need while here if
+                                                // numPos
+                                                // > buffersize
+            posPointer -= posPointerMax; // add the pending positions from
+                                         // last
+                                         // round
+            posPointerMax = positionDeltaReader.fill();
+            assert posPointerMax != 0;
+          }
+        } else if (posPointer + 1 >= posPointerMax) {
+          posPointerMax = positionDeltaReader.fill();
+          assert posPointerMax != 0;
+          posPointer = -1;
+        }
+        currentPos = 0;
+        positionsPending = freqs[freqPointer];
+      }
+    }
+  }
+
+  private final void reset() throws IOException {
+    docPointer = docDeltasReader.offset();
+    docPointerMax = docDeltasReader.end();
+    assert docPointerMax >= docPointer : "dP=" + docPointer + " dPMax="
+        + docPointerMax;
+    if (freqsReader != null) { // do we have freqs?
+      freqPointer = freqsReader.offset();
+      freqPointerMax = freqsReader.end();
+      assert freqPointerMax >= freqPointer : "fP=" + freqPointer + " fPMax="
+          + freqPointerMax;
+      --docPointer;
+      --freqPointer;
+
+      if (positionDeltaReader != null) { // compiler should optimize this away
+        currentPos = 0;
+        posPointer = positionDeltaReader.offset();
+        posPointerMax = positionDeltaReader.end();
+        assert posPointerMax >= posPointer : "pP=" + posPointer + " pPMax="
+            + posPointerMax;
+        --posPointer;
+        positionsPending = 0;
+      }
+    }
+  }
+}
\ No newline at end of file

Modified: lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java?rev=1063638&r1=1063637&r2=1063638&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java Wed Jan 26 09:00:05 2011
@@ -92,17 +92,36 @@ public class TestDocsAndPositions extend
 
   public DocsAndPositionsEnum getDocsAndPositions(IndexReader reader,
       BytesRef bytes, Bits skipDocs) throws IOException {
+    if (random.nextInt(10) == 0) { // once in a while throw in a non-bulk reader
       return reader.termPositionsEnum(null, fieldName, bytes);
+    } else {
+      BulkPostingsEnum bulkTermPostingsEnum = reader.bulkTermPostingsEnum(
+          fieldName, bytes, true, true);
+      if (bulkTermPostingsEnum == null){
+        return null;
+      }
+      return new BulkPostingsEnumWrapper(bulkTermPostingsEnum, null,
+          reader.docFreq(new Term(fieldName, bytes)));
+    }
   }
 
   public DocsEnum getDocsEnum(IndexReader reader, BytesRef bytes,
       boolean freqs, Bits skipDocs) throws IOException {
     int randInt = random.nextInt(10);
-    if (randInt == 0) { // once in a while throw in a positions enum
+    if (randInt == 0) { // once in a while throw in a non-bulk reader
+      return reader.termDocsEnum(skipDocs, fieldName, bytes);
+    } else if (randInt == 5) {
+
       return getDocsAndPositions(reader, bytes, skipDocs);
     } else {
-      return reader.termDocsEnum(skipDocs, fieldName, bytes);
-    } 
+      BulkPostingsEnum bulkTermPostingsEnum = reader.bulkTermPostingsEnum(
+          fieldName, bytes, freqs, false);
+      if (bulkTermPostingsEnum == null) {
+        return null;
+      }
+      return new BulkPostingsEnumWrapper(bulkTermPostingsEnum, skipDocs,
+          reader.docFreq(new Term(fieldName, bytes)));
+    }
   }
 
   /**