You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/08/07 11:31:26 UTC

svn commit: r1616448 - in /lucene/dev/trunk/lucene/core/src: java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java

Author: mikemccand
Date: Thu Aug  7 09:31:26 2014
New Revision: 1616448

URL: http://svn.apache.org/r1616448
Log:
LUCENE-5841: make sure final term blocks are the right size

Added:
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java
      - copied, changed from r1615236, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
Modified:
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1616448&r1=1616447&r2=1616448&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Thu Aug  7 09:31:26 2014
@@ -686,8 +686,6 @@ public final class BlockTreeTermsWriter 
 
       long startFP = out.getFilePointer();
 
-      // if (DEBUG) System.out.println("    writeBlock fp=" + startFP + " isFloor=" + isFloor + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
-
       boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
 
       final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0));
@@ -703,9 +701,11 @@ public final class BlockTreeTermsWriter 
       }
       out.writeVInt(code);
 
-      // if (DEBUG) {
-      //   System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
-      // }
+      /*
+      if (DEBUG) {
+        System.out.println("  writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
+      }
+      */
 
       // 1st pass: pack term suffix bytes into byte[] blob
       // TODO: cutover to bulk int codec... simple64?
@@ -732,9 +732,9 @@ public final class BlockTreeTermsWriter 
           /*
           if (DEBUG) {
             BytesRef suffixBytes = new BytesRef(suffix);
-            System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
             suffixBytes.length = suffix;
-            System.out.println("    write term suffix=" + suffixBytes);
+            System.out.println("    write term suffix=" + brToString(suffixBytes));
           }
           */
           // For leaf block we write suffix straight
@@ -772,9 +772,9 @@ public final class BlockTreeTermsWriter 
             /*
             if (DEBUG) {
               BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+              System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
               suffixBytes.length = suffix;
-              System.out.println("    write term suffix=" + suffixBytes);
+              System.out.println("    write term suffix=" + brToString(suffixBytes));
             }
             */
             // For non-leaf block we borrow 1 bit to record
@@ -949,6 +949,9 @@ public final class BlockTreeTermsWriter 
       if (numTerms > 0) {
         // if (DEBUG) System.out.println("BTTW: finish prefixStarts=" + Arrays.toString(prefixStarts));
 
+        // Add empty term to force closing of all final blocks:
+        pushTerm(new BytesRef());
+
         // TODO: if pending.size() is already 1 with a non-zero prefix length
         // we can save writing a "degenerate" root block, but we have to
         // fix all the places that assume the root block's prefix is the empty string:

Copied: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java (from r1615236, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java?p2=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java&p1=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java&r1=1615236&r2=1616448&rev=1616448&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java Thu Aug  7 09:31:26 2014
@@ -1,4 +1,4 @@
-package org.apache.lucene.index;
+package org.apache.lucene.codecs.lucene41;
 
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,16 +17,24 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
+import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.blocktree.FieldReader;
+import org.apache.lucene.codecs.blocktree.Stats;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.TestUtil;
 
-/** Tests the codec configuration defined by LuceneTestCase randomly
- *  (typically a mix across different fields).
- */
-public class TestPostingsFormat extends BasePostingsFormatTestCase {
+public class TestLucene41PostingsFormat extends BasePostingsFormatTestCase {
 
   @Override
   protected Codec getCodec() {
-    return Codec.getDefault();
+    return TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat());
   }
 
   @Override
@@ -34,4 +42,27 @@ public class TestPostingsFormat extends 
     assumeTrue("The MockRandom PF randomizes content on the fly, so we can't check it", false);
   }
 
+  /** Make sure the final sub-block(s) are not skipped. */
+  public void testFinalBlock() throws Exception {
+    Directory d = newDirectory();
+    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    for(int i=0;i<25;i++) {
+      Document doc = new Document();
+      doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
+      doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
+      w.addDocument(doc);
+    }
+    w.forceMerge(1);
+
+    DirectoryReader r = DirectoryReader.open(w, true);
+    assertEquals(1, r.leaves().size());
+    FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
+    // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
+    Stats stats = field.computeStats();
+    assertEquals(0, stats.floorBlockCount);
+    assertEquals(2, stats.nonFloorBlockCount);
+    r.close();
+    w.close();
+    d.close();
+  }
 }