You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2012/08/09 11:16:09 UTC

svn commit: r1371114 - in /lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked: BlockPackedPostingsFormat.java BlockPackedPostingsReader.java BlockPackedSkipWriter.java

Author: jpountz
Date: Thu Aug  9 09:16:09 2012
New Revision: 1371114

URL: http://svn.apache.org/viewvc?rev=1371114&view=rev
Log:
LUCENE-3892: backporting r1371010 and r1371011 to BlockPacked.

I changed the comment on the value of BLOCK_SIZE. Indeed, since PackedInts
encoding/decoding is long-aligned, all numbers of bits per value that are
co-prime with 64 cannot encode/decode less than 64 values per iteration, so the
block size should be at least 64. Switching it to a lower value (eg. 32) should
work but will perform many unnecessary operations in the encoding/decoding step.

Modified:
    lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
    lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
    lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java

Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsFormat.java Thu Aug  9 09:16:09 2012
@@ -41,6 +41,10 @@ public final class BlockPackedPostingsFo
 
   private final int minTermBlockSize;
   private final int maxTermBlockSize;
+
+  // nocommit is this right?:
+  // NOTE: should be at least 64 because of PackedInts long-aligned encoding/decoding
+  // NOTE: must be factor of ... 64?
   public final static int BLOCK_SIZE = 128;
 
   public BlockPackedPostingsFormat() {

Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedPostingsReader.java Thu Aug  9 09:16:09 2012
@@ -384,7 +384,6 @@ public final class BlockPackedPostingsRe
     }
     
     private void refillDocs() throws IOException {
-      //System.out.println("["+docFreq+"]"+" refillDoc");
       final int left = docFreq - docUpto;
       assert left > 0;
 
@@ -426,7 +425,6 @@ public final class BlockPackedPostingsRe
           }
           return doc = NO_MORE_DOCS;
         }
-        //System.out.println("["+docFreq+"]"+" nextDoc");
         if (docBufferUpto == BLOCK_SIZE) {
           refillDocs();
         }
@@ -486,15 +484,15 @@ public final class BlockPackedPostingsRe
           skipped = true;
         }
 
-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 
 
         if (newDocUpto > docUpto) {
           // Skipper moved
           if (DEBUG) {
             System.out.println("skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer());
           }
-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;
 
           // Force to read next block
           docBufferUpto = BLOCK_SIZE;
@@ -503,6 +501,12 @@ public final class BlockPackedPostingsRe
         }
         nextSkipDoc = skipper.getNextSkipDoc();
       }
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }
 
       // Now scan... this is an inlined/pared down version
       // of nextDoc():
@@ -510,18 +514,6 @@ public final class BlockPackedPostingsRe
         if (DEBUG) {
           System.out.println("  scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
         }
-        if (docUpto == docFreq) {
-          return doc = NO_MORE_DOCS;
-        }
-
-        // nocommit: in theory we should not hit this?  ie
-        // skipper should already have moved us to the block
-        // containing the doc?  yet assert false trips ... i
-        // think because if you advance w/o having done a
-        // nextDoc yet()... can we assert/remove this?
-        if (docBufferUpto == BLOCK_SIZE) {
-          refillDocs();
-        }
         accum += docDeltaBuffer[docBufferUpto];
         docUpto++;
 
@@ -529,6 +521,9 @@ public final class BlockPackedPostingsRe
           break;
         }
         docBufferUpto++;
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
       }
 
       if (liveDocs == null || liveDocs.get(accum)) {
@@ -666,9 +661,9 @@ public final class BlockPackedPostingsRe
     }
 
     private void refillDocs() throws IOException {
-    //System.out.println("["+docFreq+"]"+" refillDoc");
       final int left = docFreq - docUpto;
       assert left > 0;
+
       if (left >= BLOCK_SIZE) {
         if (DEBUG) {
           System.out.println("    fill doc block from fp=" + docIn.getFilePointer());
@@ -797,7 +792,7 @@ public final class BlockPackedPostingsRe
           skipped = true;
         }
 
-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 
 
         if (newDocUpto > docUpto) {
           // Skipper moved
@@ -805,8 +800,8 @@ public final class BlockPackedPostingsRe
             System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto());
           }
 
-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;
 
           // Force to read next block
           docBufferUpto = BLOCK_SIZE;
@@ -817,6 +812,12 @@ public final class BlockPackedPostingsRe
         }
         nextSkipDoc = skipper.getNextSkipDoc();
       }
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }
 
       // Now scan... this is an inlined/pared down version
       // of nextDoc():
@@ -827,16 +828,6 @@ public final class BlockPackedPostingsRe
         if (docUpto == docFreq) {
           return doc = NO_MORE_DOCS;
         }
-        // nocommit: in theory we should not hit this?  ie
-        // skipper should already have moved us to the block
-        // containing the doc?  yet assert false trips ... i
-        // think because if you advance w/o having done a
-        // nextDoc yet()... can we assert/remove this?
-        if (docBufferUpto == BLOCK_SIZE) {
-          // nocommit hmm skip freq?  but: we don't ever
-          // scan over more than one block?
-          refillDocs();
-        }
         accum += docDeltaBuffer[docBufferUpto];
         freq = freqBuffer[docBufferUpto];
         posPendingCount += freq;
@@ -846,6 +837,9 @@ public final class BlockPackedPostingsRe
         if (accum >= target) {
           break;
         }
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
       }
 
       if (liveDocs == null || liveDocs.get(accum)) {
@@ -1110,7 +1104,6 @@ public final class BlockPackedPostingsRe
     }
 
     private void refillDocs() throws IOException {
-      //System.out.println("["+docFreq+"]"+" refillDoc");
       final int left = docFreq - docUpto;
       assert left > 0;
 
@@ -1226,7 +1219,6 @@ public final class BlockPackedPostingsRe
         if (docUpto == docFreq) {
           return doc = NO_MORE_DOCS;
         }
-        //System.out.println("["+docFreq+"]"+" nextDoc");
         if (docBufferUpto == BLOCK_SIZE) {
           refillDocs();
         }
@@ -1293,15 +1285,15 @@ public final class BlockPackedPostingsRe
           skipped = true;
         }
 
-        final int newDocUpto = skipper.skipTo(target); 
+        final int newDocUpto = skipper.skipTo(target) + 1; 
 
         if (newDocUpto > docUpto) {
           // Skipper moved
           if (DEBUG) {
             System.out.println("    skipper moved to docUpto=" + newDocUpto + " vs current=" + docUpto + "; docID=" + skipper.getDoc() + " fp=" + skipper.getDocPointer() + " pos.fp=" + skipper.getPosPointer() + " pos.bufferUpto=" + skipper.getPosBufferUpto() + " pay.fp=" + skipper.getPayPointer() + " lastStartOffset=" + lastStartOffset);
           }
-          assert newDocUpto % BLOCK_SIZE == (BLOCK_SIZE - 1): "got " + newDocUpto;
-          docUpto = newDocUpto+1;
+          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+          docUpto = newDocUpto;
 
           // Force to read next block
           docBufferUpto = BLOCK_SIZE;
@@ -1315,24 +1307,51 @@ public final class BlockPackedPostingsRe
         }
         nextSkipDoc = skipper.getNextSkipDoc();
       }
-
-      // nocommit inline nextDoc here
+      if (docUpto == docFreq) {
+        return doc = NO_MORE_DOCS;
+      }
+      if (docBufferUpto == BLOCK_SIZE) {
+        refillDocs();
+      }
 
       // Now scan:
-      while (nextDoc() != NO_MORE_DOCS) {
-        if (doc >= target) {
-          if (DEBUG) {
-            System.out.println("  advance return doc=" + doc);
-          }
-          return doc;
+      // Now scan:
+      while (true) {
+        if (DEBUG) {
+          System.out.println("  scan doc=" + accum + " docBufferUpto=" + docBufferUpto);
         }
-      }
+        accum += docDeltaBuffer[docBufferUpto];
+        freq = freqBuffer[docBufferUpto];
+        posPendingCount += freq;
+        docBufferUpto++;
+        docUpto++;
 
-      if (DEBUG) {
-        System.out.println("  advance return doc=END");
+        if (accum >= target) {
+          break;
+        }
+        if (docUpto == docFreq) {
+          return doc = NO_MORE_DOCS;
+        }
       }
 
-      return NO_MORE_DOCS;
+      if (liveDocs == null || liveDocs.get(accum)) {
+        if (DEBUG) {
+          System.out.println("  return doc=" + accum);
+        }
+        if (indexHasPayloads) {
+          payloadByteUpto += payloadLength;
+          payloadLength = 0;
+        }
+        position = 0;
+        payloadLength = 0;
+        lastStartOffset = 0;
+        return doc = accum;
+      } else {
+        if (DEBUG) {
+          System.out.println("  now do nextDoc()");
+        }
+        return nextDoc();
+      }
     }
 
     // nocommit in theory we could avoid loading frq block

Modified: lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java?rev=1371114&r1=1371113&r2=1371114&view=diff
==============================================================================
--- lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java (original)
+++ lucene/dev/branches/pforcodec_3892/lucene/core/src/java/org/apache/lucene/codecs/blockpacked/BlockPackedSkipWriter.java Thu Aug  9 09:16:09 2012
@@ -37,7 +37,7 @@ import org.apache.lucene.codecs.MultiLev
  * block, only record skip data at the start its start point(if it exist).
  *
  * For each skip point, we will record: 
- * 1. lastDocID, 
+ * 1. docID in former position, i.e. for position 12, record docID[11], etc.
  * 2. its related file points(position, payload), 
  * 3. related numbers or uptos(position, payload).
  * 4. start offset.