You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2011/01/21 00:01:29 UTC
svn commit: r1061561 [1/2] - in /lucene/dev/branches/bulkpostings: ./ lucene/
lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/
lucene/contrib/memory/src/java/org/apache/lucene/index/memory/
lucene/contrib/misc/src/java/org/apa...
Author: mikemccand
Date: Thu Jan 20 23:01:27 2011
New Revision: 1061561
URL: http://svn.apache.org/viewvc?rev=1061561&view=rev
Log:
LUCENE-2872: merge from trunk (rev 1061453)
Added:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
- copied unchanged from r1061453, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
- copied, changed from r1061453, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
- copied unchanged from r1061453, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsWriter.java
Removed:
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/DeltaBytesWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermState.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PrefixCodedTermsWriter.java
Modified:
lucene/dev/branches/bulkpostings/ (props changed)
lucene/dev/branches/bulkpostings/lucene/ (props changed)
lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsWriterImpl.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardCodec.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsReader.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/standard/StandardPostingsWriter.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FilteredTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/search/cache/DocTermsIndexCreator.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/ByteArrayDataInput.java
lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/store/DataInput.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestExternalCodecs.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearch.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReader.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/TestMultiFields.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockFixedIntBlockCodec.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockintblock/MockVariableIntBlockCodec.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mockrandom/MockRandomCodec.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSepCodec.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/index/codecs/mocksep/MockSingleIntIndexInput.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/search/TestFieldCache.java
lucene/dev/branches/bulkpostings/lucene/src/test/org/apache/lucene/util/automaton/fst/TestFSTs.java
lucene/dev/branches/bulkpostings/solr/ (props changed)
lucene/dev/branches/bulkpostings/solr/CHANGES.txt (props changed)
lucene/dev/branches/bulkpostings/solr/KEYS (props changed)
lucene/dev/branches/bulkpostings/solr/LICENSE.txt (props changed)
lucene/dev/branches/bulkpostings/solr/NOTICE.txt (props changed)
lucene/dev/branches/bulkpostings/solr/README.txt (props changed)
lucene/dev/branches/bulkpostings/solr/build.xml (props changed)
lucene/dev/branches/bulkpostings/solr/client/ (props changed)
lucene/dev/branches/bulkpostings/solr/common-build.xml (props changed)
lucene/dev/branches/bulkpostings/solr/contrib/ (props changed)
lucene/dev/branches/bulkpostings/solr/example/ (props changed)
lucene/dev/branches/bulkpostings/solr/lib/ (props changed)
lucene/dev/branches/bulkpostings/solr/site/ (props changed)
lucene/dev/branches/bulkpostings/solr/src/ (props changed)
lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/request/UnInvertedField.java
lucene/dev/branches/bulkpostings/solr/src/java/org/apache/solr/util/HighFrequencyDictionary.java
lucene/dev/branches/bulkpostings/solr/testlogging.properties (props changed)
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedTermsEnum.java Thu Jan 20 23:01:27 2011
@@ -156,9 +156,9 @@ public class InstantiatedTermsEnum exten
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
assert state != null && state instanceof OrdTermState;
- return seek(((OrdTermState)state).ord); // just use the ord for simplicity
+ seek(((OrdTermState)state).ord); // just use the ord for simplicity
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java Thu Jan 20 23:01:27 2011
@@ -950,9 +950,9 @@ public class MemoryIndex implements Seri
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
assert state != null;
- return this.seek(((OrdTermState)state).ord);
+ this.seek(((OrdTermState)state).ord);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingCodec.java Thu Jan 20 23:01:27 2011
@@ -32,7 +32,7 @@ import org.apache.lucene.index.codecs.Po
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
@@ -131,7 +131,7 @@ public class AppendingCodec extends Code
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files)
throws IOException {
StandardPostingsReader.files(dir, segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictReader.java Thu Jan 20 23:01:27 2011
@@ -22,15 +22,15 @@ import java.util.Comparator;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-public class AppendingTermsDictReader extends PrefixCodedTermsReader {
+public class AppendingTermsDictReader extends BlockTermsReader {
public AppendingTermsDictReader(TermsIndexReaderBase indexReader,
Directory dir, FieldInfos fieldInfos, String segment,
@@ -43,7 +43,7 @@ public class AppendingTermsDictReader ex
@Override
protected void readHeader(IndexInput in) throws IOException {
CodecUtil.checkHeader(in, AppendingTermsDictWriter.CODEC_NAME,
- PrefixCodedTermsWriter.VERSION_START, PrefixCodedTermsWriter.VERSION_CURRENT);
+ BlockTermsWriter.VERSION_START, BlockTermsWriter.VERSION_CURRENT);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/misc/src/java/org/apache/lucene/index/codecs/appending/AppendingTermsDictWriter.java Thu Jan 20 23:01:27 2011
@@ -22,13 +22,13 @@ import java.util.Comparator;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.codecs.PostingsWriterBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-public class AppendingTermsDictWriter extends PrefixCodedTermsWriter {
+public class AppendingTermsDictWriter extends BlockTermsWriter {
final static String CODEC_NAME = "APPENDING_TERMS_DICT";
public AppendingTermsDictWriter(TermsIndexWriterBase indexWriter,
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java Thu Jan 20 23:01:27 2011
@@ -215,7 +215,10 @@ public class TestParser extends LuceneTe
}
private void dumpResults(String qType,Query q, int numDocs) throws IOException
{
- TopDocs hits = searcher.search(q, null, numDocs);
+ if (VERBOSE) {
+ System.out.println("TEST: query=" + q);
+ }
+ TopDocs hits = searcher.search(q, null, numDocs);
assertTrue(qType +" should produce results ", hits.totalHits>0);
if(VERBOSE)
{
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java Thu Jan 20 23:01:27 2011
@@ -802,7 +802,7 @@ public class CheckIndex {
msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
} catch (Throwable e) {
- msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+ msg("ERROR: " + e);
status.error = e;
if (infoStream != null) {
e.printStackTrace(infoStream);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Thu Jan 20 23:01:27 2011
@@ -903,8 +903,7 @@ final class DocumentsWriter {
final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK;
/* if you increase this, you must fix field cache impl for
- * getTerms/getTermsIndex requires <= 32768. Also fix
- * DeltaBytesWriter's TERM_EOF if necessary. */
+ * getTerms/getTermsIndex requires <= 32768. */
final static int MAX_TERM_LENGTH_UTF8 = BYTE_BLOCK_SIZE-2;
/* Initial chunks size of the shared int[] blocks used to
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java Thu Jan 20 23:01:27 2011
@@ -156,12 +156,12 @@ public class FilterIndexReader extends I
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
return in.docFreq();
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
return in.totalTermFreq();
}
@@ -186,8 +186,8 @@ public class FilterIndexReader extends I
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
- return in.seek(term, state);
+ public void seek(BytesRef term, TermState state) throws IOException {
+ in.seek(term, state);
}
@Override
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/MultiTermsEnum.java Thu Jan 20 23:01:27 2011
@@ -261,7 +261,7 @@ public final class MultiTermsEnum extend
}
@Override
- public int docFreq() {
+ public int docFreq() throws IOException {
int sum = 0;
for(int i=0;i<numTop;i++) {
sum += top[i].terms.docFreq();
@@ -270,7 +270,7 @@ public final class MultiTermsEnum extend
}
@Override
- public long totalTermFreq() {
+ public long totalTermFreq() throws IOException {
long sum = 0;
for(int i=0;i<numTop;i++) {
final long v = top[i].terms.totalTermFreq();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/Terms.java Thu Jan 20 23:01:27 2011
@@ -91,11 +91,8 @@ public abstract class Terms {
public BulkPostingsEnum bulkPostings(BytesRef text, TermState termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
- if (termsEnum.seek(text, termState) == TermsEnum.SeekStatus.FOUND) {
- return termsEnum.bulkPostings(reuse, doFreqs, doPositions);
- } else {
- return null;
- }
+ termsEnum.seek(text, termState);
+ return termsEnum.bulkPostings(reuse, doFreqs, doPositions);
}
/** Get {@link DocsEnum} for the specified term. This
@@ -118,11 +115,8 @@ public abstract class Terms {
* @see TermsEnum#seek(BytesRef, TermState) */
public DocsEnum docs(Bits skipDocs, BytesRef term, TermState termState, DocsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
- if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
- return termsEnum.docs(skipDocs, reuse);
- } else {
- return null;
- }
+ termsEnum.seek(term, termState);
+ return termsEnum.docs(skipDocs, reuse);
}
/**
@@ -134,11 +128,8 @@ public abstract class Terms {
* @see TermsEnum#seek(BytesRef, TermState) */
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, BytesRef term, TermState termState, DocsAndPositionsEnum reuse) throws IOException {
final TermsEnum termsEnum = getThreadTermsEnum();
- if (termsEnum.seek(term, termState) == TermsEnum.SeekStatus.FOUND) {
- return termsEnum.docsAndPositions(skipDocs, reuse);
- } else {
- return null;
- }
+ termsEnum.seek(term, termState);
+ return termsEnum.docsAndPositions(skipDocs, reuse);
}
public long getUniqueTermCount() throws IOException {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/TermsEnum.java Thu Jan 20 23:01:27 2011
@@ -80,7 +80,7 @@ public abstract class TermsEnum {
* use this method. Low-level implementations may position the TermsEnum
* without re-seeking the term dictionary.
* <p>
- * Seeking by {@link TermState} should only be used iff the enu the state was
+ * Seeking by {@link TermState} should only be used iff the enum the state was
* obtained from and the enum the state is used for seeking are obtained from
* the same {@link IndexReader}, otherwise a {@link #seek(BytesRef, TermState)} call can
* leave the enum in undefined state.
@@ -97,8 +97,8 @@ public abstract class TermsEnum {
* @param term the term the TermState corresponds to
* @param state the {@link TermState}
* */
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
- return seek(term);
+ public void seek(BytesRef term, TermState state) throws IOException {
+ seek(term);
}
/** Increments the enumeration to the next element.
@@ -124,7 +124,7 @@ public abstract class TermsEnum {
* term. Do not call this before calling next() for the
* first time, after next() returns null or seek returns
* {@link SeekStatus#END}.*/
- public abstract int docFreq();
+ public abstract int docFreq() throws IOException;
/** Returns the total number of occurrences of this term
* across all documents (the sum of the freq() for each
@@ -132,7 +132,7 @@ public abstract class TermsEnum {
* codec doesn't support this measure. Note that, like
* other term measures, this measure does not take
* deleted documents into account. */
- public abstract long totalTermFreq();
+ public abstract long totalTermFreq() throws IOException;
/** Get {@link DocsEnum} for the current term. Do not
* call this before calling {@link #next} or {@link
@@ -247,7 +247,7 @@ public abstract class TermsEnum {
}
@Override
- public SeekStatus seek(BytesRef term, TermState state) throws IOException {
+ public void seek(BytesRef term, TermState state) throws IOException {
throw new IllegalStateException("this method should never be called");
}
};
Copied: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (from r1061453, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java?p2=lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java&r1=1061453&r2=1061561&rev=1061561&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/BlockTermsReader.java Thu Jan 20 23:01:27 2011
@@ -24,6 +24,7 @@ import java.util.Comparator;
import java.util.Iterator;
import java.util.TreeMap;
+import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo;
@@ -571,6 +572,13 @@ public class BlockTermsReader extends Fi
}
@Override
+ public BulkPostingsEnum bulkPostings(BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ decodeMetaData();
+ BulkPostingsEnum postingsEnum = postingsReader.bulkPostings(fieldInfo, state, reuse, doFreqs, doPositions);
+ return postingsEnum;
+ }
+
+ @Override
public DocsAndPositionsEnum docsAndPositions(Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
//System.out.println("BTR.d&p this=" + this);
decodeMetaData();
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexReader.java Thu Jan 20 23:01:27 2011
@@ -94,6 +94,7 @@ public class FixedGapTermsIndexReader ex
// Read directory
final int numFields = in.readVInt();
+ //System.out.println("FGR: init seg=" + segment + " div=" + indexDivisor + " nF=" + numFields);
for(int i=0;i<numFields;i++) {
final int field = in.readVInt();
final int numIndexTerms = in.readVInt();
@@ -241,9 +242,6 @@ public class FixedGapTermsIndexReader ex
this.packedOffsetsStart = packedOffsetsStart;
this.numIndexTerms = numIndexTerms;
- // We still create the indexReader when indexDivisor
- // is -1, so that PrefixCodedTermsReader can call
- // isIndexTerm for each field:
if (indexDivisor > 0) {
loadTermsIndex();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/FixedGapTermsIndexWriter.java Thu Jan 20 23:01:27 2011
@@ -53,7 +53,6 @@ public class FixedGapTermsIndexWriter ex
private final List<SimpleFieldWriter> fields = new ArrayList<SimpleFieldWriter>();
private final FieldInfos fieldInfos; // unread
- private IndexOutput termsOut;
public FixedGapTermsIndexWriter(SegmentWriteState state) throws IOException {
final String indexFileName = IndexFileNames.segmentFileName(state.segmentName, state.codecId, TERMS_INDEX_EXTENSION);
@@ -71,13 +70,9 @@ public class FixedGapTermsIndexWriter ex
}
@Override
- public void setTermsOutput(IndexOutput termsOut) {
- this.termsOut = termsOut;
- }
-
- @Override
- public FieldWriter addField(FieldInfo field) {
- SimpleFieldWriter writer = new SimpleFieldWriter(field);
+ public FieldWriter addField(FieldInfo field, long termsFilePointer) {
+ //System.out.println("FGW: addFfield=" + field.name);
+ SimpleFieldWriter writer = new SimpleFieldWriter(field, termsFilePointer);
fields.add(writer);
return writer;
}
@@ -119,10 +114,10 @@ public class FixedGapTermsIndexWriter ex
private final BytesRef lastTerm = new BytesRef();
- SimpleFieldWriter(FieldInfo fieldInfo) {
+ SimpleFieldWriter(FieldInfo fieldInfo, long termsFilePointer) {
this.fieldInfo = fieldInfo;
indexStart = out.getFilePointer();
- termsStart = lastTermsPointer = termsOut.getFilePointer();
+ termsStart = lastTermsPointer = termsFilePointer;
termLengths = new short[0];
termsPointerDeltas = new int[0];
}
@@ -130,33 +125,8 @@ public class FixedGapTermsIndexWriter ex
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
// First term is first indexed term:
+ //System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString());
if (0 == (numTerms++ % termIndexInterval)) {
-
- final int indexedTermLength = indexedTermPrefixLength(lastTerm, text);
-
- // write only the min prefix that shows the diff
- // against prior term
- out.writeBytes(text.bytes, text.offset, indexedTermLength);
-
- if (termLengths.length == numIndexTerms) {
- termLengths = ArrayUtil.grow(termLengths);
- }
- if (termsPointerDeltas.length == numIndexTerms) {
- termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
- }
-
- // save delta terms pointer
- final long fp = termsOut.getFilePointer();
- termsPointerDeltas[numIndexTerms] = (int) (fp - lastTermsPointer);
- lastTermsPointer = fp;
-
- // save term length (in bytes)
- assert indexedTermLength <= Short.MAX_VALUE;
- termLengths[numIndexTerms] = (short) indexedTermLength;
- totTermLength += indexedTermLength;
-
- lastTerm.copy(text);
- numIndexTerms++;
return true;
} else {
if (0 == numTerms % termIndexInterval) {
@@ -169,13 +139,41 @@ public class FixedGapTermsIndexWriter ex
}
@Override
- public void finish() throws IOException {
+ public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+ final int indexedTermLength = indexedTermPrefixLength(lastTerm, text);
+ //System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);
+
+ // write only the min prefix that shows the diff
+ // against prior term
+ out.writeBytes(text.bytes, text.offset, indexedTermLength);
+
+ if (termLengths.length == numIndexTerms) {
+ termLengths = ArrayUtil.grow(termLengths);
+ }
+ if (termsPointerDeltas.length == numIndexTerms) {
+ termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
+ }
+
+ // save delta terms pointer
+ termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
+ lastTermsPointer = termsFilePointer;
+
+ // save term length (in bytes)
+ assert indexedTermLength <= Short.MAX_VALUE;
+ termLengths[numIndexTerms] = (short) indexedTermLength;
+ totTermLength += indexedTermLength;
+
+ lastTerm.copy(text);
+ numIndexTerms++;
+ }
+
+ @Override
+ public void finish(long termsFilePointer) throws IOException {
// write primary terms dict offsets
packedIndexStart = out.getFilePointer();
- final long maxValue = termsOut.getFilePointer();
- PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(maxValue));
+ PackedInts.Writer w = PackedInts.getWriter(out, numIndexTerms, PackedInts.bitsRequired(termsFilePointer));
// relative to our indexStart
long upto = 0;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsReaderBase.java Thu Jan 20 23:01:27 2011
@@ -30,12 +30,12 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter; // javadocs
-/** PrefixCodedTermsReader interacts with a single instance
- * of this to manage creation of {@link DocsEnum} and
+/** BlockTermsReader interacts with a single instance
+ * of this class to manage creation of {@link DocsEnum} and
* {@link DocsAndPositionsEnum} instances. It provides an
* IndexInput (termsIn) where this class may read any
* previously stored data that it had written in its
- * corresponding {@link StandardPostingsWriter} at indexing
+ * corresponding {@link PostingsWriterBase} at indexing
* time.
* @lucene.experimental */
@@ -44,23 +44,29 @@ public abstract class PostingsReaderBase
public abstract void init(IndexInput termsIn) throws IOException;
/** Return a newly created empty TermState */
- public abstract PrefixCodedTermState newTermState() throws IOException;
+ public abstract BlockTermState newTermState() throws IOException;
- public abstract void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState state, boolean isIndexTerm) throws IOException;
+ /** Actually decode metadata for next term */
+ public abstract void nextTerm(FieldInfo fieldInfo, BlockTermState state) throws IOException;
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsEnum docs(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
+ public abstract DocsEnum docs(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsEnum reuse) throws IOException;
// nocommit jdocs
// nocommit make abstract
- public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, TermState state, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ public BulkPostingsEnum bulkPostings(FieldInfo fieldInfo, BlockTermState state, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
throw new UnsupportedOperationException();
}
/** Must fully consume state, since after this call that
* TermState may be reused. */
- public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, PrefixCodedTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
+ public abstract DocsAndPositionsEnum docsAndPositions(FieldInfo fieldInfo, BlockTermState state, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException;
public abstract void close() throws IOException;
+
+ /** Reads data for all terms in the next block; this
+ * method should merely load the byte[] blob but not
+ * decode, which is done in {@link #nextTerm}. */
+ public abstract void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState termState) throws IOException;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/PostingsWriterBase.java Thu Jan 20 23:01:27 2011
@@ -33,8 +33,10 @@ public abstract class PostingsWriterBase
public abstract void startTerm() throws IOException;
+ public abstract void flushTermsBlock() throws IOException;
+
/** Finishes the current term */
- public abstract void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException;
+ public abstract void finishTerm(TermStats stats) throws IOException;
public abstract void setField(FieldInfo fieldInfo);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/TermsIndexWriterBase.java Thu Jan 20 23:01:27 2011
@@ -17,7 +17,6 @@ package org.apache.lucene.index.codecs;
* limitations under the License.
*/
-import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
@@ -25,14 +24,13 @@ import java.io.IOException;
/** @lucene.experimental */
public abstract class TermsIndexWriterBase {
- public abstract void setTermsOutput(IndexOutput out);
-
public abstract class FieldWriter {
public abstract boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException;
- public abstract void finish() throws IOException;
+ public abstract void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException;
+ public abstract void finish(long termsFilePointer) throws IOException;
}
- public abstract FieldWriter addField(FieldInfo fieldInfo) throws IOException;
+ public abstract FieldWriter addField(FieldInfo fieldInfo, long termsFilePointer) throws IOException;
public abstract void close() throws IOException;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexReader.java Thu Jan 20 23:01:27 2011
@@ -164,9 +164,6 @@ public class VariableGapTermsIndexReader
this.fieldInfo = fieldInfo;
this.indexStart = indexStart;
- // We still create the indexReader when indexDivisor
- // is -1, so that PrefixCodedTermsReader can call
- // isIndexTerm for each field:
if (indexDivisor > 0) {
loadTermsIndex();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/VariableGapTermsIndexWriter.java Thu Jan 20 23:01:27 2011
@@ -52,7 +52,6 @@ public class VariableGapTermsIndexWriter
private final List<FSTFieldWriter> fields = new ArrayList<FSTFieldWriter>();
private final FieldInfos fieldInfos; // unread
- private IndexOutput termsOut;
private final IndexTermSelector policy;
/** @lucene.experimental */
@@ -60,6 +59,7 @@ public class VariableGapTermsIndexWriter
// Called sequentially on every term being written,
// returning true if this term should be indexed
public abstract boolean isIndexTerm(BytesRef term, TermStats stats);
+ public abstract void newField(FieldInfo fieldInfo);
}
/** Same policy as {@link FixedGapTermsIndexWriter} */
@@ -83,6 +83,11 @@ public class VariableGapTermsIndexWriter
return false;
}
}
+
+ @Override
+ public void newField(FieldInfo fieldInfo) {
+ count = interval;
+ }
}
/** Sets an index term when docFreq >= docFreqThresh, or
@@ -96,6 +101,9 @@ public class VariableGapTermsIndexWriter
public EveryNOrDocFreqTermSelector(int docFreqThresh, int interval) {
this.interval = interval;
this.docFreqThresh = docFreqThresh;
+
+ // First term is first indexed term:
+ count = interval;
}
@Override
@@ -108,6 +116,11 @@ public class VariableGapTermsIndexWriter
return false;
}
}
+
+ @Override
+ public void newField(FieldInfo fieldInfo) {
+ count = interval;
+ }
}
// TODO: it'd be nice to let the FST builder prune based
@@ -158,14 +171,10 @@ public class VariableGapTermsIndexWriter
}
@Override
- public void setTermsOutput(IndexOutput termsOut) {
- this.termsOut = termsOut;
- }
-
- @Override
- public FieldWriter addField(FieldInfo field) throws IOException {
- //System.out.println("VGW: field=" + field.name);
- FSTFieldWriter writer = new FSTFieldWriter(field);
+ public FieldWriter addField(FieldInfo field, long termsFilePointer) throws IOException {
+ ////System.out.println("VGW: field=" + field.name);
+ policy.newField(field);
+ FSTFieldWriter writer = new FSTFieldWriter(field, termsFilePointer);
fields.add(writer);
return writer;
}
@@ -200,42 +209,48 @@ public class VariableGapTermsIndexWriter
private final BytesRef lastTerm = new BytesRef();
private boolean first = true;
- public FSTFieldWriter(FieldInfo fieldInfo) throws IOException {
+ public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException {
this.fieldInfo = fieldInfo;
fstOutputs = PositiveIntOutputs.getSingleton(true);
fstBuilder = new Builder<Long>(FST.INPUT_TYPE.BYTE1,
0, 0, true,
fstOutputs);
indexStart = out.getFilePointer();
- //System.out.println("VGW: field=" + fieldInfo.name);
+ ////System.out.println("VGW: field=" + fieldInfo.name);
// Always put empty string in
- fstBuilder.add(new BytesRef(), fstOutputs.get(termsOut.getFilePointer()));
+ fstBuilder.add(new BytesRef(), fstOutputs.get(termsFilePointer));
}
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
+ //System.out.println("VGW: index term=" + text.utf8ToString());
+ // NOTE: we must force the first term per field to be
+ // indexed, in case policy doesn't:
if (policy.isIndexTerm(text, stats) || first) {
first = false;
- //System.out.println("VGW: index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer());
- final int lengthSave = text.length;
- text.length = indexedTermPrefixLength(lastTerm, text);
- try {
- fstBuilder.add(text, fstOutputs.get(termsOut.getFilePointer()));
- } finally {
- text.length = lengthSave;
- }
- lastTerm.copy(text);
+ //System.out.println(" YES");
return true;
} else {
- //System.out.println("VGW: not index term=" + text.utf8ToString() + " fp=" + termsOut.getFilePointer());
lastTerm.copy(text);
return false;
}
}
@Override
- public void finish() throws IOException {
+ public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
+ final int lengthSave = text.length;
+ text.length = indexedTermPrefixLength(lastTerm, text);
+ try {
+ fstBuilder.add(text, fstOutputs.get(termsFilePointer));
+ } finally {
+ text.length = lengthSave;
+ }
+ lastTerm.copy(text);
+ }
+
+ @Override
+ public void finish(long termsFilePointer) throws IOException {
fst = fstBuilder.finish();
if (fst != null) {
fst.save(out);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/bulkvint/BulkVIntCodec.java Thu Jan 20 23:01:27 2011
@@ -36,10 +36,10 @@ import org.apache.lucene.index.codecs.in
import org.apache.lucene.index.codecs.intblock.FixedIntBlockIndexOutput;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.VariableGapTermsIndexReader;
import org.apache.lucene.index.codecs.VariableGapTermsIndexWriter;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -164,7 +164,7 @@ public class BulkVIntCodec extends Codec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -202,15 +202,15 @@ public class BulkVIntCodec extends Codec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir,
- state.fieldInfos,
- state.segmentInfo.name,
- postingsReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir,
+ state.fieldInfos,
+ state.segmentInfo.name,
+ postingsReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -227,14 +227,14 @@ public class BulkVIntCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
VariableGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
VariableGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/FixedIntBlockIndexInput.java Thu Jan 20 23:01:27 2011
@@ -25,6 +25,7 @@ import java.io.IOException;
import org.apache.lucene.index.codecs.sep.IntIndexInput;
import org.apache.lucene.index.BulkPostingsEnum;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
/** Abstract base class that reads fixed-size blocks of ints
@@ -144,7 +145,7 @@ public abstract class FixedIntBlockIndex
// This is used when reading skip data:
@Override
- public void read(final IndexInput indexIn, final boolean absolute) throws IOException {
+ public void read(final DataInput indexIn, final boolean absolute) throws IOException {
// nocommit -- somehow we should share the "upto" for
// doc & freq since they will always be "in sync"
if (absolute) {
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/intblock/VariableIntBlockIndexInput.java Thu Jan 20 23:01:27 2011
@@ -25,6 +25,7 @@ import java.io.IOException;
import org.apache.lucene.index.BulkPostingsEnum;
import org.apache.lucene.index.codecs.sep.IntIndexInput;
+import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.IndexInput;
// TODO: much of this can be shared code w/ the fixed case
@@ -157,7 +158,7 @@ public abstract class VariableIntBlockIn
// This is used when reading skip data:
@Override
- public void read(final IndexInput indexIn, final boolean absolute) throws IOException {
+ public void read(final DataInput indexIn, final boolean absolute) throws IOException {
if (absolute) {
fp = indexIn.readVLong();
upto = indexIn.readByte()&0xFF;
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/FrameOfRefCodec.java Thu Jan 20 23:01:27 2011
@@ -31,8 +31,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
import org.apache.lucene.index.codecs.standard.StandardCodec;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
@@ -63,7 +63,7 @@ public class FrameOfRefCodec extends Cod
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -103,7 +103,7 @@ public class FrameOfRefCodec extends Cod
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
+ FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
@@ -128,14 +128,14 @@ public class FrameOfRefCodec extends Cod
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, id, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+ BlockTermsReader.files(dir, segmentInfo, id, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta/PatchedFrameOfRefCodec.java Thu Jan 20 23:01:27 2011
@@ -31,8 +31,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.sep.SepPostingsWriterImpl;
import org.apache.lucene.index.codecs.sep.SepPostingsReaderImpl;
import org.apache.lucene.index.codecs.standard.StandardCodec;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
@@ -63,7 +63,7 @@ public class PatchedFrameOfRefCodec exte
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -103,7 +103,7 @@ public class PatchedFrameOfRefCodec exte
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
+ FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
@@ -128,14 +128,14 @@ public class PatchedFrameOfRefCodec exte
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, id, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+ BlockTermsReader.files(dir, segmentInfo, id, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pfordelta2/PForDeltaFixedIntBlockCodec.java Thu Jan 20 23:01:27 2011
@@ -38,8 +38,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
import org.apache.lucene.index.codecs.PostingsWriterBase;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -179,7 +179,7 @@ public class PForDeltaFixedIntBlockCodec
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, postingsWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -217,7 +217,7 @@ public class PForDeltaFixedIntBlockCodec
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
+ FieldsProducer ret = new BlockTermsReader(indexReader,
state.dir,
state.fieldInfos,
state.segmentInfo.name,
@@ -242,14 +242,14 @@ public class PForDeltaFixedIntBlockCodec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String codecId, Set<String> files) {
SepPostingsReaderImpl.files(segmentInfo, codecId, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, codecId, files);
+ BlockTermsReader.files(dir, segmentInfo, codecId, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, codecId, files);
}
@Override
public void getExtensions(Set<String> extensions) {
SepPostingsWriterImpl.getExtensions(extensions);
- PrefixCodedTermsReader.getExtensions(extensions);
+ BlockTermsReader.getExtensions(extensions);
FixedGapTermsIndexReader.getIndexExtensions(extensions);
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingCodec.java Thu Jan 20 23:01:27 2011
@@ -32,8 +32,8 @@ import org.apache.lucene.index.codecs.Fi
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.FixedGapTermsIndexReader;
import org.apache.lucene.index.codecs.FixedGapTermsIndexWriter;
-import org.apache.lucene.index.codecs.PrefixCodedTermsReader;
-import org.apache.lucene.index.codecs.PrefixCodedTermsWriter;
+import org.apache.lucene.index.codecs.BlockTermsReader;
+import org.apache.lucene.index.codecs.BlockTermsWriter;
import org.apache.lucene.index.codecs.TermsIndexReaderBase;
import org.apache.lucene.index.codecs.TermsIndexWriterBase;
import org.apache.lucene.index.codecs.standard.StandardCodec;
@@ -89,7 +89,7 @@ public class PulsingCodec extends Codec
// Terms dict
success = false;
try {
- FieldsConsumer ret = new PrefixCodedTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
+ FieldsConsumer ret = new BlockTermsWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
@@ -132,13 +132,13 @@ public class PulsingCodec extends Codec
// Terms dict reader
success = false;
try {
- FieldsProducer ret = new PrefixCodedTermsReader(indexReader,
- state.dir, state.fieldInfos, state.segmentInfo.name,
- pulsingReader,
- state.readBufferSize,
- BytesRef.getUTF8SortedAsUnicodeComparator(),
- StandardCodec.TERMS_CACHE_SIZE,
- state.codecId);
+ FieldsProducer ret = new BlockTermsReader(indexReader,
+ state.dir, state.fieldInfos, state.segmentInfo.name,
+ pulsingReader,
+ state.readBufferSize,
+ BytesRef.getUTF8SortedAsUnicodeComparator(),
+ StandardCodec.TERMS_CACHE_SIZE,
+ state.codecId);
success = true;
return ret;
} finally {
@@ -155,7 +155,7 @@ public class PulsingCodec extends Codec
@Override
public void files(Directory dir, SegmentInfo segmentInfo, String id, Set<String> files) throws IOException {
StandardPostingsReader.files(dir, segmentInfo, id, files);
- PrefixCodedTermsReader.files(dir, segmentInfo, id, files);
+ BlockTermsReader.files(dir, segmentInfo, id, files);
FixedGapTermsIndexReader.files(dir, segmentInfo, id, files);
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java Thu Jan 20 23:01:27 2011
@@ -25,7 +25,7 @@ import org.apache.lucene.index.BulkPosti
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.codecs.PostingsReaderBase;
-import org.apache.lucene.index.codecs.PrefixCodedTermState;
+import org.apache.lucene.index.codecs.BlockTermState;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
@@ -59,11 +59,13 @@ public class PulsingPostingsReaderImpl e
wrappedPostingsReader.init(termsIn);
}
- private static class PulsingTermState extends PrefixCodedTermState {
+ private static class PulsingTermState extends BlockTermState {
private byte[] postings;
private int postingsSize; // -1 if this term was not inlined
- private PrefixCodedTermState wrappedTermState;
- private boolean pendingIndexTerm;
+ private BlockTermState wrappedTermState;
+
+ ByteArrayDataInput inlinedBytesReader;
+ private byte[] inlinedBytes;
@Override
public Object clone() {
@@ -74,7 +76,7 @@ public class PulsingPostingsReaderImpl e
System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
} else {
assert wrappedTermState != null;
- clone.wrappedTermState = (PrefixCodedTermState) wrappedTermState.clone();
+ clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
}
return clone;
}
@@ -92,56 +94,86 @@ public class PulsingPostingsReaderImpl e
} else {
wrappedTermState.copyFrom(other.wrappedTermState);
}
+
+ // NOTE: we do not copy the
+ // inlinedBytes/inlinedBytesReader; these are only
+ // stored on the "primary" TermState. They are
+ // "transient" to cloned term states.
}
@Override
public String toString() {
if (postingsSize == -1) {
- return "PulsingTermState: not inlined";
+ return "PulsingTermState: not inlined: wrapped=" + wrappedTermState;
} else {
- return "PulsingTermState: inlined size=" + postingsSize;
+ return "PulsingTermState: inlined size=" + postingsSize + " " + super.toString();
}
}
}
@Override
- public PrefixCodedTermState newTermState() throws IOException {
+ public void readTermsBlock(IndexInput termsIn, FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ final PulsingTermState termState = (PulsingTermState) _termState;
+ if (termState.inlinedBytes == null) {
+ termState.inlinedBytes = new byte[128];
+ termState.inlinedBytesReader = new ByteArrayDataInput(null);
+ }
+ int len = termsIn.readVInt();
+ if (termState.inlinedBytes.length < len) {
+ termState.inlinedBytes = new byte[ArrayUtil.oversize(len, 1)];
+ }
+ termsIn.readBytes(termState.inlinedBytes, 0, len);
+ termState.inlinedBytesReader.reset(termState.inlinedBytes);
+ termState.wrappedTermState.termCount = 0;
+ wrappedPostingsReader.readTermsBlock(termsIn, fieldInfo, termState.wrappedTermState);
+ }
+
+ @Override
+ public BlockTermState newTermState() throws IOException {
PulsingTermState state = new PulsingTermState();
state.wrappedTermState = wrappedPostingsReader.newTermState();
return state;
}
@Override
- public void readTerm(IndexInput termsIn, FieldInfo fieldInfo, PrefixCodedTermState _termState, boolean isIndexTerm) throws IOException {
+ public void nextTerm(FieldInfo fieldInfo, BlockTermState _termState) throws IOException {
+ //System.out.println("PR nextTerm");
PulsingTermState termState = (PulsingTermState) _termState;
- termState.pendingIndexTerm |= isIndexTerm;
-
// total TF, but in the omitTFAP case its computed based on docFreq.
long count = fieldInfo.omitTermFreqAndPositions ? termState.docFreq : termState.totalTermFreq;
-
+ //System.out.println(" count=" + count + " threshold=" + maxPositions);
+
if (count <= maxPositions) {
+ //System.out.println(" inlined");
// Inlined into terms dict -- just read the byte[] blob in,
// but don't decode it now (we only decode when a DocsEnum
// or D&PEnum is pulled):
- termState.postingsSize = termsIn.readVInt();
+ termState.postingsSize = termState.inlinedBytesReader.readVInt();
if (termState.postings == null || termState.postings.length < termState.postingsSize) {
termState.postings = new byte[ArrayUtil.oversize(termState.postingsSize, 1)];
}
- termsIn.readBytes(termState.postings, 0, termState.postingsSize);
+ // TODO: sort of silly to copy from one big byte[]
+ // (the blob holding all inlined terms' blobs for
+ // current term block) into another byte[] (just the
+ // blob for this term)...
+ termState.inlinedBytesReader.readBytes(termState.postings, 0, termState.postingsSize);
} else {
+ //System.out.println(" not inlined");
termState.postingsSize = -1;
+ // TODO: should we do full copyFrom? much heavier...?
termState.wrappedTermState.docFreq = termState.docFreq;
- wrappedPostingsReader.readTerm(termsIn, fieldInfo, termState.wrappedTermState, termState.pendingIndexTerm);
- termState.pendingIndexTerm = false;
+ termState.wrappedTermState.totalTermFreq = termState.totalTermFreq;
+ wrappedPostingsReader.nextTerm(fieldInfo, termState.wrappedTermState);
+ termState.wrappedTermState.termCount++;
}
}
// TODO: we could actually reuse, by having TL that
// holds the last wrapped reuse, and vice-versa
@Override
- public DocsEnum docs(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
+ public DocsEnum docs(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsEnum reuse) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
if (termState.postingsSize != -1) {
PulsingDocsEnum postings;
@@ -167,7 +199,7 @@ public class PulsingPostingsReaderImpl e
// TODO: we could actually reuse, by having TL that
// holds the last wrapped reuse, and vice-versa
@Override
- public BulkPostingsEnum bulkPostings(FieldInfo field, TermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
+ public BulkPostingsEnum bulkPostings(FieldInfo field, BlockTermState _termState, BulkPostingsEnum reuse, boolean doFreqs, boolean doPositions) throws IOException {
PulsingTermState termState = (PulsingTermState) _termState;
// nocommit -- other codecs should check too
if (doPositions && !doFreqs) {
@@ -191,7 +223,7 @@ public class PulsingPostingsReaderImpl e
// TODO: -- not great that we can't always reuse
@Override
- public DocsAndPositionsEnum docsAndPositions(FieldInfo field, PrefixCodedTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
+ public DocsAndPositionsEnum docsAndPositions(FieldInfo field, BlockTermState _termState, Bits skipDocs, DocsAndPositionsEnum reuse) throws IOException {
if (field.omitTermFreqAndPositions) {
return null;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsWriterImpl.java Thu Jan 20 23:01:27 2011
@@ -27,8 +27,8 @@ import org.apache.lucene.store.RAMOutput
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CodecUtil;
-// TODO: we pulse based on total TF of the term,
-// it might be better to eg pulse by "net bytes used"
+// TODO: we now inline based on total TF of the term,
+// but it might be better to inline by "net bytes used"
// so that a term that has only 1 posting but a huge
// payload would not be inlined. Though this is
// presumably rare in practice...
@@ -62,8 +62,9 @@ public final class PulsingPostingsWriter
}
// TODO: -- lazy init this? ie, if every single term
- // was pulsed then we never need to use this fallback?
- // Fallback writer for non-pulsed terms:
+ // was inlined (eg for a "primary key" field) then we
+ // never need to use this fallback? Fallback writer for
+ // non-inlined terms:
final PostingsWriterBase wrappedPostingsWriter;
/** If the total number of positions (summed across all docs
@@ -173,22 +174,18 @@ public final class PulsingPostingsWriter
}
}
- private boolean pendingIsIndexTerm;
-
private final RAMOutputStream buffer = new RAMOutputStream();
+ private final RAMOutputStream buffer2 = new RAMOutputStream();
/** Called when we are done adding docs to this term */
@Override
- public void finishTerm(TermStats stats, boolean isIndexTerm) throws IOException {
- //System.out.println("PW finishTerm docCount=" + docCount);
+ public void finishTerm(TermStats stats) throws IOException {
+ //System.out.println("PW finishTerm docCount=" + stats.docFreq);
assert pendingCount > 0 || pendingCount == -1;
- pendingIsIndexTerm |= isIndexTerm;
-
if (pendingCount == -1) {
- wrappedPostingsWriter.finishTerm(stats, pendingIsIndexTerm);
- pendingIsIndexTerm = false;
+ wrappedPostingsWriter.finishTerm(stats);
} else {
// There were few enough total occurrences for this
@@ -254,8 +251,8 @@ public final class PulsingPostingsWriter
}
//System.out.println(" bytes=" + buffer.getFilePointer());
- termsOut.writeVInt((int) buffer.getFilePointer());
- buffer.writeTo(termsOut);
+ buffer2.writeVInt((int) buffer.getFilePointer());
+ buffer.writeTo(buffer2);
buffer.reset();
}
@@ -267,6 +264,18 @@ public final class PulsingPostingsWriter
wrappedPostingsWriter.close();
}
+ @Override
+ public void flushTermsBlock() throws IOException {
+ termsOut.writeVInt((int) buffer2.getFilePointer());
+ buffer2.writeTo(termsOut);
+ buffer2.reset();
+
+ // TODO: can we avoid calling this if all terms
+ // were inlined...? Eg for a "primary key" field, the
+ // wrapped codec is never invoked...
+ wrappedPostingsWriter.flushTermsBlock();
+ }
+
// Pushes pending positions to the wrapped codec
private void push() throws IOException {
//System.out.println("PW now push @ " + pendingCount + " wrapped=" + wrappedPostingsWriter);
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java?rev=1061561&r1=1061560&r2=1061561&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/codecs/sep/IntIndexInput.java Thu Jan 20 23:01:27 2011
@@ -17,11 +17,11 @@ package org.apache.lucene.index.codecs.s
* limitations under the License.
*/
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.index.BulkPostingsEnum;
-
-import java.io.IOException;
import java.io.Closeable;
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.index.BulkPostingsEnum;
// nocommit -- move to oal.store?
@@ -39,7 +39,7 @@ public abstract class IntIndexInput impl
public abstract static class Index {
- public abstract void read(IndexInput indexIn, boolean absolute) throws IOException;
+ public abstract void read(DataInput indexIn, boolean absolute) throws IOException;
public abstract void read(BulkPostingsEnum.BlockReader indexIn, boolean absolute) throws IOException;