You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/08/07 11:31:26 UTC
svn commit: r1616448 - in /lucene/dev/trunk/lucene/core/src:
java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java
Author: mikemccand
Date: Thu Aug 7 09:31:26 2014
New Revision: 1616448
URL: http://svn.apache.org/r1616448
Log:
LUCENE-5841: make sure final term blocks are the right size
Added:
lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java
- copied, changed from r1615236, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java
Modified:
lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1616448&r1=1616447&r2=1616448&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Thu Aug 7 09:31:26 2014
@@ -686,8 +686,6 @@ public final class BlockTreeTermsWriter
long startFP = out.getFilePointer();
- // if (DEBUG) System.out.println(" writeBlock fp=" + startFP + " isFloor=" + isFloor + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
-
boolean hasFloorLeadLabel = isFloor && floorLeadLabel != -1;
final BytesRef prefix = new BytesRef(prefixLength + (hasFloorLeadLabel ? 1 : 0));
@@ -703,9 +701,11 @@ public final class BlockTreeTermsWriter
}
out.writeVInt(code);
- // if (DEBUG) {
- // System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + length + " startFP=" + startFP + (isFloor ? (" floorLeadByte=" + Integer.toHexString(floorLeadByte&0xff)) : "") + " isLastInFloor=" + isLastInFloor);
- // }
+ /*
+ if (DEBUG) {
+ System.out.println(" writeBlock " + (isFloor ? "(floor) " : "") + "seg=" + segment + " pending.size()=" + pending.size() + " prefixLength=" + prefixLength + " indexPrefix=" + brToString(prefix) + " entCount=" + (end-start+1) + " startFP=" + startFP + (isFloor ? (" floorLeadLabel=" + Integer.toHexString(floorLeadLabel)) : ""));
+ }
+ */
// 1st pass: pack term suffix bytes into byte[] blob
// TODO: cutover to bulk int codec... simple64?
@@ -732,9 +732,9 @@ public final class BlockTreeTermsWriter
/*
if (DEBUG) {
BytesRef suffixBytes = new BytesRef(suffix);
- System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+ System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix;
- System.out.println(" write term suffix=" + suffixBytes);
+ System.out.println(" write term suffix=" + brToString(suffixBytes));
}
*/
// For leaf block we write suffix straight
@@ -772,9 +772,9 @@ public final class BlockTreeTermsWriter
/*
if (DEBUG) {
BytesRef suffixBytes = new BytesRef(suffix);
- System.arraycopy(term.term.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+ System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
suffixBytes.length = suffix;
- System.out.println(" write term suffix=" + suffixBytes);
+ System.out.println(" write term suffix=" + brToString(suffixBytes));
}
*/
// For non-leaf block we borrow 1 bit to record
@@ -949,6 +949,9 @@ public final class BlockTreeTermsWriter
if (numTerms > 0) {
// if (DEBUG) System.out.println("BTTW: finish prefixStarts=" + Arrays.toString(prefixStarts));
+ // Add empty term to force closing of all final blocks:
+ pushTerm(new BytesRef());
+
// TODO: if pending.size() is already 1 with a non-zero prefix length
// we can save writing a "degenerate" root block, but we have to
// fix all the places that assume the root block's prefix is the empty string:
Copied: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java (from r1615236, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java?p2=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java&p1=lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java&r1=1615236&r2=1616448&rev=1616448&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/codecs/lucene41/TestLucene41PostingsFormat.java Thu Aug 7 09:31:26 2014
@@ -1,4 +1,4 @@
-package org.apache.lucene.index;
+package org.apache.lucene.codecs.lucene41;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -17,16 +17,24 @@ package org.apache.lucene.index;
* limitations under the License.
*/
+import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.blocktree.FieldReader;
+import org.apache.lucene.codecs.blocktree.Stats;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.BasePostingsFormatTestCase;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.TestUtil;
-/** Tests the codec configuration defined by LuceneTestCase randomly
- * (typically a mix across different fields).
- */
-public class TestPostingsFormat extends BasePostingsFormatTestCase {
+public class TestLucene41PostingsFormat extends BasePostingsFormatTestCase {
@Override
protected Codec getCodec() {
- return Codec.getDefault();
+ return TestUtil.alwaysPostingsFormat(new Lucene41PostingsFormat());
}
@Override
@@ -34,4 +42,27 @@ public class TestPostingsFormat extends
assumeTrue("The MockRandom PF randomizes content on the fly, so we can't check it", false);
}
+ /** Make sure the final sub-block(s) are not skipped. */
+ public void testFinalBlock() throws Exception {
+ Directory d = newDirectory();
+ IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+ for(int i=0;i<25;i++) {
+ Document doc = new Document();
+ doc.add(newStringField("field", Character.toString((char) (97+i)), Field.Store.NO));
+ doc.add(newStringField("field", "z" + Character.toString((char) (97+i)), Field.Store.NO));
+ w.addDocument(doc);
+ }
+ w.forceMerge(1);
+
+ DirectoryReader r = DirectoryReader.open(w, true);
+ assertEquals(1, r.leaves().size());
+ FieldReader field = (FieldReader) r.leaves().get(0).reader().fields().terms("field");
+ // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
+ Stats stats = field.computeStats();
+ assertEquals(0, stats.floorBlockCount);
+ assertEquals(2, stats.nonFloorBlockCount);
+ r.close();
+ w.close();
+ d.close();
+ }
}