You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/08/19 18:40:56 UTC

svn commit: r1159679 - in /lucene/dev/branches/blocktree_3030: TODO lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java

Author: mikemccand
Date: Fri Aug 19 16:40:56 2011
New Revision: 1159679

URL: http://svn.apache.org/viewvc?rev=1159679&view=rev
Log:
LUCENE-3030: add comments

Modified:
    lucene/dev/branches/blocktree_3030/TODO
    lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
    lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java

Modified: lucene/dev/branches/blocktree_3030/TODO
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/TODO?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/TODO (original)
+++ lucene/dev/branches/blocktree_3030/TODO Fri Aug 19 16:40:56 2011
@@ -10,7 +10,6 @@ perf tests:
       Indexer: net bytes indexed 9635556306
       Indexer: 30.824156883707392 GB/hour plain text
 
-  - test perf on single seg index too!
   - try *larger* maxItemsInBlock: could give better net perf?  ie less seeking and more scanning
 
 what to do about short terms that "force" a block to mark itself as hasTerms!!??

Modified: lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java Fri Aug 19 16:40:56 2011
@@ -765,6 +765,14 @@ public class BlockTreeTermsReader extend
         for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
           arcs[arcIdx] = new FST.Arc<BytesRef>();
         }
+
+        // TODO: if the automaon is "smallish" we really
+        // should use the terms index to seek at least to
+        // the initial term and likely to subsequent terms
+        // (or, maybe just fallback to ATE for such cases).
+        // Else the seek cost of loading the frames will be
+        // too costly.
+
         final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
         // Empty string prefix must have an output in the index!
         assert arc.isFinal();

Modified: lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java Fri Aug 19 16:40:56 2011
@@ -759,6 +759,9 @@ public class BlockTreeTermsWriter extend
             final int suffix = block.prefix.length - prefixLength;
 
             assert suffix > 0;
+
+            // For non-leaf block we borrow 1 bit to record
+            // if entry is term or sub-block
             bytesWriter.writeVInt((suffix<<1)|1);
             bytesWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
             assert block.fp < startFP;