You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/08/19 18:40:56 UTC
svn commit: r1159679 - in /lucene/dev/branches/blocktree_3030: TODO
lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
Author: mikemccand
Date: Fri Aug 19 16:40:56 2011
New Revision: 1159679
URL: http://svn.apache.org/viewvc?rev=1159679&view=rev
Log:
LUCENE-3030: add comments
Modified:
lucene/dev/branches/blocktree_3030/TODO
lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
Modified: lucene/dev/branches/blocktree_3030/TODO
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/TODO?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/TODO (original)
+++ lucene/dev/branches/blocktree_3030/TODO Fri Aug 19 16:40:56 2011
@@ -10,7 +10,6 @@ perf tests:
Indexer: net bytes indexed 9635556306
Indexer: 30.824156883707392 GB/hour plain text
- - test perf on single seg index too!
- try *larger* maxItemsInBlock: could give better net perf? ie less seeking and more scanning
what to do about short terms that "force" a block to mark itself as hasTerms!!??
Modified: lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsReader.java Fri Aug 19 16:40:56 2011
@@ -765,6 +765,14 @@ public class BlockTreeTermsReader extend
for(int arcIdx=0;arcIdx<arcs.length;arcIdx++) {
arcs[arcIdx] = new FST.Arc<BytesRef>();
}
+
+ // TODO: if the automaon is "smallish" we really
+ // should use the terms index to seek at least to
+ // the initial term and likely to subsequent terms
+ // (or, maybe just fallback to ATE for such cases).
+ // Else the seek cost of loading the frames will be
+ // too costly.
+
final FST.Arc<BytesRef> arc = index.getFirstArc(arcs[0]);
// Empty string prefix must have an output in the index!
assert arc.isFinal();
Modified: lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java?rev=1159679&r1=1159678&r2=1159679&view=diff
==============================================================================
--- lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/blocktree_3030/lucene/src/java/org/apache/lucene/index/codecs/BlockTreeTermsWriter.java Fri Aug 19 16:40:56 2011
@@ -759,6 +759,9 @@ public class BlockTreeTermsWriter extend
final int suffix = block.prefix.length - prefixLength;
assert suffix > 0;
+
+ // For non-leaf block we borrow 1 bit to record
+ // if entry is term or sub-block
bytesWriter.writeVInt((suffix<<1)|1);
bytesWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
assert block.fp < startFP;