You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2011/05/09 17:24:23 UTC
svn commit: r1101062 [4/21] - in /lucene/dev/branches/bulkpostings: ./
dev-tools/ dev-tools/eclipse/ dev-tools/idea/.idea/
dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/
dev-tools/idea/lucene/contrib/db/bdb/ dev-tools/idea/...
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/Syns2Index.java Mon May 9 15:24:04 2011
@@ -36,7 +36,7 @@ import org.apache.lucene.document.Docume
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
@@ -250,7 +250,7 @@ public class Syns2Index
// override the specific index if it already exists
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
Version.LUCENE_CURRENT, ana).setOpenMode(OpenMode.CREATE));
- ((LogMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
+ ((TieredMergePolicy) writer.getConfig().getMergePolicy()).setUseCompoundFile(true); // why?
Iterator<String> i1 = word2Nums.keySet().iterator();
while (i1.hasNext()) // for each word
{
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestWordnet.java Mon May 9 15:24:04 2011
@@ -29,6 +29,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
public class TestWordnet extends LuceneTestCase {
private IndexSearcher searcher;
@@ -42,6 +43,7 @@ public class TestWordnet extends LuceneT
// create a temporary synonym index
File testFile = getDataFile("testSynonyms.txt");
String commandLineArgs[] = { testFile.getAbsolutePath(), storePathName };
+ _TestUtil.rmDir(new File(storePathName));
try {
Syns2Index.main(commandLineArgs);
@@ -61,7 +63,7 @@ public class TestWordnet extends LuceneT
private void assertExpandsTo(String term, String expected[]) throws IOException {
Query expandedQuery = SynExpand.expand(term, searcher, new
- MockAnalyzer(), "field", 1F);
+ MockAnalyzer(random), "field", 1F);
BooleanQuery expectedQuery = new BooleanQuery();
for (String t : expected)
expectedQuery.add(new TermQuery(new Term("field", t)),
@@ -71,8 +73,12 @@ public class TestWordnet extends LuceneT
@Override
public void tearDown() throws Exception {
- searcher.close();
- dir.close();
+ if (searcher != null) {
+ searcher.close();
+ }
+ if (dir != null) {
+ dir.close();
+ }
rmDir(storePathName); // delete our temporary synonym index
super.tearDown();
}
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestParser.java Mon May 9 15:24:04 2011
@@ -49,7 +49,7 @@ public class TestParser extends LuceneTe
@BeforeClass
public static void beforeClass() throws Exception {
// TODO: rewrite test (this needs to set QueryParser.enablePositionIncrements, too, for work with CURRENT):
- Analyzer analyzer=new MockAnalyzer(MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
+ Analyzer analyzer=new MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.ENGLISH_STOPSET, false);
//initialize the parser
builder=new CorePlusExtensionsParser("contents",analyzer);
@@ -187,7 +187,8 @@ public class TestParser extends LuceneTe
}
public void testDuplicateFilterQueryXML() throws ParserException, IOException
{
- Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders().length == 1);
+ Assume.assumeTrue(searcher.getIndexReader().getSequentialSubReaders() == null ||
+ searcher.getIndexReader().getSequentialSubReaders().length == 1);
Query q=parse("DuplicateFilterQuery.xml");
int h = searcher.search(q, null, 1000).totalHits;
assertEquals("DuplicateFilterQuery should produce 1 result ", 1,h);
Modified: lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java Mon May 9 15:24:04 2011
@@ -44,7 +44,7 @@ import org.xml.sax.SAXException;
public class TestQueryTemplateManager extends LuceneTestCase {
CoreParser builder;
- Analyzer analyzer=new MockAnalyzer();
+ Analyzer analyzer=new MockAnalyzer(random);
private IndexSearcher searcher;
private Directory dir;
Modified: lucene/dev/branches/bulkpostings/lucene/docs/contributions.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/contributions.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/contributions.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/contributions.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Contributions
@@ -275,7 +275,7 @@ document.write("Last Published: " + docu
<a href="#PDFTextStream -- PDF text and metadata extraction">PDFTextStream -- PDF text and metadata extraction</a>
</li>
<li>
-<a href="#PJ Classic & PJ Professional - PDF Document Conversion">PJ Classic & PJ Professional - PDF Document Conversion</a>
+<a href="#PJ Classic & PJ Professional - PDF Document Conversion">PJ Classic & PJ Professional - PDF Document Conversion</a>
</li>
</ul>
</li>
@@ -403,7 +403,7 @@ document.write("Last Published: " + docu
URL
</th>
<td>
- <a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
+ <a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2
</a>
</td>
@@ -538,7 +538,7 @@ document.write("Last Published: " + docu
</tr>
</table>
-<a name="N10124"></a><a name="PJ Classic & PJ Professional - PDF Document Conversion"></a>
+<a name="N10124"></a><a name="PJ Classic & PJ Professional - PDF Document Conversion"></a>
<h3 class="boxed">PJ Classic & PJ Professional - PDF Document Conversion</h3>
<table class="ForrestTable" cellspacing="1" cellpadding="4">
Modified: lucene/dev/branches/bulkpostings/lucene/docs/contributions.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/contributions.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/contributions.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/contributions.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/demo.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/demo.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/demo.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/demo.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Building and Installing the Basic Demo
Modified: lucene/dev/branches/bulkpostings/lucene/docs/demo.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/demo.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/demo.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/demo.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/demo2.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/demo2.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/demo2.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/demo2.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Basic Demo Sources Walk-through
Modified: lucene/dev/branches/bulkpostings/lucene/docs/demo2.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/demo2.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/demo2.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/demo2.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/fileformats.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/fileformats.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/fileformats.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/fileformats.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Index File Formats
@@ -425,11 +425,19 @@ document.write("Last Published: " + docu
<p>
In version 3.1, segments records the code version
that created them. See LUCENE-2720 for details.
+
+ Additionally segments track explicitly whether or
+ not they have term vectors. See LUCENE-2811 for details.
+ </p>
+<p>
+ In version 3.2, numeric fields are written as natively
+ to stored fields file, previously they were stored in
+ text format only.
</p>
</div>
-<a name="N10037"></a><a name="Definitions"></a>
+<a name="N1003A"></a><a name="Definitions"></a>
<h2 class="boxed">Definitions</h2>
<div class="section">
<p>
@@ -470,7 +478,7 @@ document.write("Last Published: " + docu
strings, the first naming the field, and the second naming text
within the field.
</p>
-<a name="N10057"></a><a name="Inverted Indexing"></a>
+<a name="N1005A"></a><a name="Inverted Indexing"></a>
<h3 class="boxed">Inverted Indexing</h3>
<p>
The index stores statistics about terms in order
@@ -480,7 +488,7 @@ document.write("Last Published: " + docu
it. This is the inverse of the natural relationship, in which
documents list terms.
</p>
-<a name="N10063"></a><a name="Types of Fields"></a>
+<a name="N10066"></a><a name="Types of Fields"></a>
<h3 class="boxed">Types of Fields</h3>
<p>
In Lucene, fields may be <i>stored</i>, in which
@@ -494,7 +502,7 @@ document.write("Last Published: " + docu
to be indexed literally.
</p>
<p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
-<a name="N10080"></a><a name="Segments"></a>
+<a name="N10083"></a><a name="Segments"></a>
<h3 class="boxed">Segments</h3>
<p>
Lucene indexes may be composed of multiple sub-indexes, or
@@ -520,7 +528,7 @@ document.write("Last Published: " + docu
Searches may involve multiple segments and/or multiple indexes, each
index potentially composed of a set of segments.
</p>
-<a name="N1009E"></a><a name="Document Numbers"></a>
+<a name="N100A1"></a><a name="Document Numbers"></a>
<h3 class="boxed">Document Numbers</h3>
<p>
Internally, Lucene refers to documents by an integer <i>document
@@ -575,7 +583,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N100C5"></a><a name="Overview"></a>
+<a name="N100C8"></a><a name="Overview"></a>
<h2 class="boxed">Overview</h2>
<div class="section">
<p>
@@ -674,7 +682,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N10108"></a><a name="File Naming"></a>
+<a name="N1010B"></a><a name="File Naming"></a>
<h2 class="boxed">File Naming</h2>
<div class="section">
<p>
@@ -701,7 +709,7 @@ document.write("Last Published: " + docu
</p>
</div>
-<a name="N10117"></a><a name="file-names"></a>
+<a name="N1011A"></a><a name="file-names"></a>
<h2 class="boxed">Summary of File Extensions</h2>
<div class="section">
<p>The following table summarizes the names and extensions of the files in Lucene:
@@ -843,10 +851,10 @@ document.write("Last Published: " + docu
</div>
-<a name="N10201"></a><a name="Primitive Types"></a>
+<a name="N10204"></a><a name="Primitive Types"></a>
<h2 class="boxed">Primitive Types</h2>
<div class="section">
-<a name="N10206"></a><a name="Byte"></a>
+<a name="N10209"></a><a name="Byte"></a>
<h3 class="boxed">Byte</h3>
<p>
The most primitive type
@@ -854,7 +862,7 @@ document.write("Last Published: " + docu
other data types are defined as sequences
of bytes, so file formats are byte-order independent.
</p>
-<a name="N1020F"></a><a name="UInt32"></a>
+<a name="N10212"></a><a name="UInt32"></a>
<h3 class="boxed">UInt32</h3>
<p>
32-bit unsigned integers are written as four
@@ -864,7 +872,7 @@ document.write("Last Published: " + docu
UInt32 --> <Byte><sup>4</sup>
</p>
-<a name="N1021E"></a><a name="Uint64"></a>
+<a name="N10221"></a><a name="Uint64"></a>
<h3 class="boxed">Uint64</h3>
<p>
64-bit unsigned integers are written as eight
@@ -873,7 +881,7 @@ document.write("Last Published: " + docu
<p>UInt64 --> <Byte><sup>8</sup>
</p>
-<a name="N1022D"></a><a name="VInt"></a>
+<a name="N10230"></a><a name="VInt"></a>
<h3 class="boxed">VInt</h3>
<p>
A variable-length format for positive integers is
@@ -1423,13 +1431,13 @@ document.write("Last Published: " + docu
This provides compression while still being
efficient to decode.
</p>
-<a name="N10512"></a><a name="Chars"></a>
+<a name="N10515"></a><a name="Chars"></a>
<h3 class="boxed">Chars</h3>
<p>
Lucene writes unicode
character sequences as UTF-8 encoded bytes.
</p>
-<a name="N1051B"></a><a name="String"></a>
+<a name="N1051E"></a><a name="String"></a>
<h3 class="boxed">String</h3>
<p>
Lucene writes strings as UTF-8 encoded bytes.
@@ -1442,10 +1450,10 @@ document.write("Last Published: " + docu
</div>
-<a name="N10528"></a><a name="Compound Types"></a>
+<a name="N1052B"></a><a name="Compound Types"></a>
<h2 class="boxed">Compound Types</h2>
<div class="section">
-<a name="N1052D"></a><a name="MapStringString"></a>
+<a name="N10530"></a><a name="MapStringString"></a>
<h3 class="boxed">Map<String,String></h3>
<p>
In a couple places Lucene stores a Map
@@ -1458,13 +1466,13 @@ document.write("Last Published: " + docu
</div>
-<a name="N1053D"></a><a name="Per-Index Files"></a>
+<a name="N10540"></a><a name="Per-Index Files"></a>
<h2 class="boxed">Per-Index Files</h2>
<div class="section">
<p>
The files in this section exist one-per-index.
</p>
-<a name="N10545"></a><a name="Segments File"></a>
+<a name="N10548"></a><a name="Segments File"></a>
<h3 class="boxed">Segments File</h3>
<p>
The active segments in the index are stored in the
@@ -1508,7 +1516,7 @@ document.write("Last Published: " + docu
<b>3.1</b>
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
- IsCompoundFile, DeletionCount, HasProx, Diagnostics><sup>SegCount</sup>, CommitUserData, Checksum
+ IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors><sup>SegCount</sup>, CommitUserData, Checksum
</p>
<p>
Format, NameCounter, SegCount, SegSize, NumField,
@@ -1525,7 +1533,7 @@ document.write("Last Published: " + docu
</p>
<p>
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx --> Int8
+ DocStoreIsCompoundFile, HasProx, HasVectors --> Int8
</p>
<p>
CommitUserData --> Map<String,String>
@@ -1634,7 +1642,10 @@ document.write("Last Published: " + docu
Lucene version, OS, Java version, why the segment
was created (merge, flush, addIndexes), etc.
</p>
-<a name="N105CD"></a><a name="Lock File"></a>
+<p> HasVectors is 1 if this segment stores term vectors,
+ else it's 0.
+ </p>
+<a name="N105D3"></a><a name="Lock File"></a>
<h3 class="boxed">Lock File</h3>
<p>
The write lock, which is stored in the index
@@ -1648,14 +1659,14 @@ document.write("Last Published: " + docu
documents). This lock file ensures that only one
writer is modifying the index at a time.
</p>
-<a name="N105D6"></a><a name="Deletable File"></a>
+<a name="N105DC"></a><a name="Deletable File"></a>
<h3 class="boxed">Deletable File</h3>
<p>
A writer dynamically computes
the files that are deletable, instead, so no file
is written.
</p>
-<a name="N105DF"></a><a name="Compound Files"></a>
+<a name="N105E5"></a><a name="Compound Files"></a>
<h3 class="boxed">Compound Files</h3>
<p>Starting with Lucene 1.4 the compound file format became default. This
is simply a container for all files described in the next section
@@ -1682,14 +1693,14 @@ document.write("Last Published: " + docu
</div>
-<a name="N10607"></a><a name="Per-Segment Files"></a>
+<a name="N1060D"></a><a name="Per-Segment Files"></a>
<h2 class="boxed">Per-Segment Files</h2>
<div class="section">
<p>
The remaining files are all per-segment, and are
thus defined by suffix.
</p>
-<a name="N1060F"></a><a name="Fields"></a>
+<a name="N10615"></a><a name="Fields"></a>
<h3 class="boxed">Fields</h3>
<p>
@@ -1862,13 +1873,29 @@ document.write("Last Published: " + docu
<li>third bit is one for fields with compression option enabled
(if compression is enabled, the algorithm used is ZLIB),
only available for indexes until Lucene version 2.9.x</li>
+
+<li>4th to 6th bits (mask: 0x7<<3) define the type of a
+ numeric field: <ul>
+
+<li>all bits in mask are cleared if no numeric field at all</li>
+
+<li>1<<3: Value is Int</li>
+
+<li>2<<3: Value is Long</li>
+
+<li>3<<3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+
+<li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+
+</ul>
+</li>
</ul>
</p>
<p>Value -->
- String | BinaryValue (depending on Bits)
+ String | BinaryValue | Int | Long (depending on Bits)
</p>
<p>BinaryValue -->
@@ -1883,7 +1910,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N106B6"></a><a name="Term Dictionary"></a>
+<a name="N106D0"></a><a name="Term Dictionary"></a>
<h3 class="boxed">Term Dictionary</h3>
<p>
The term dictionary is represented as two files:
@@ -2075,7 +2102,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N1073A"></a><a name="Frequencies"></a>
+<a name="N10754"></a><a name="Frequencies"></a>
<h3 class="boxed">Frequencies</h3>
<p>
The .frq file contains the lists of documents
@@ -2203,7 +2230,7 @@ document.write("Last Published: " + docu
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
to entry 31 on level 0.
</p>
-<a name="N107C2"></a><a name="Positions"></a>
+<a name="N107DC"></a><a name="Positions"></a>
<h3 class="boxed">Positions</h3>
<p>
The .prx file contains the lists of positions that
@@ -2273,7 +2300,7 @@ document.write("Last Published: " + docu
Payload. If PayloadLength is not stored, then this Payload has the same
length as the Payload at the previous position.
</p>
-<a name="N107FE"></a><a name="Normalization Factors"></a>
+<a name="N10818"></a><a name="Normalization Factors"></a>
<h3 class="boxed">Normalization Factors</h3>
<p>There's a single .nrm file containing all norms:
</p>
@@ -2353,7 +2380,7 @@ document.write("Last Published: " + docu
</p>
<p>Separate norm files are created (when adequate) for both compound and non compound segments.
</p>
-<a name="N1084F"></a><a name="Term Vectors"></a>
+<a name="N10869"></a><a name="Term Vectors"></a>
<h3 class="boxed">Term Vectors</h3>
<p>
Term Vector support is an optional on a field by
@@ -2489,7 +2516,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N108EB"></a><a name="Deleted Documents"></a>
+<a name="N10905"></a><a name="Deleted Documents"></a>
<h3 class="boxed">Deleted Documents</h3>
<p>The .del file is
optional, and only exists when a segment contains deletions.
@@ -2553,7 +2580,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N10925"></a><a name="Limitations"></a>
+<a name="N1093F"></a><a name="Limitations"></a>
<h2 class="boxed">Limitations</h2>
<div class="section">
<p>
Modified: lucene/dev/branches/bulkpostings/lucene/docs/fileformats.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/fileformats.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/fileformats.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/fileformats.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Getting Started Guide
@@ -268,15 +268,13 @@ may wish to skip sections.
<li>
<a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
- is intended for anyone who wants to use the command-line Lucene demo.</li>
-<p></p>
+ is intended for anyone who wants to use the command-line Lucene demo.</li>
<li>
<a href="demo2.html">About the sources and implementation for the command-line Lucene
demo</a>. This section walks through the implementation details (sources) of the
- command-line Lucene demo. This section is intended for developers.</li>
-<p></p>
+ command-line Lucene demo. This section is intended for developers.</li>
</ul>
</div>
Modified: lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/gettingstarted.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/index.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/index.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/index.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Lucene Java Documentation</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/bulkpostings/lucene/docs/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/index.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/index.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/index.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/linkmap.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/linkmap.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/linkmap.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/linkmap.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Site Linkmap Table of Contents</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/bulkpostings/lucene/docs/linkmap.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/linkmap.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/linkmap.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/linkmap.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Lucene Contrib
Modified: lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/lucene-contrib/index.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Query Parser Syntax
Modified: lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/queryparsersyntax.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/scoring.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/scoring.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/scoring.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/scoring.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Scoring
Modified: lucene/dev/branches/bulkpostings/lucene/docs/scoring.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/scoring.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Files lucene/dev/branches/bulkpostings/lucene/docs/scoring.pdf (original) and lucene/dev/branches/bulkpostings/lucene/docs/scoring.pdf Mon May 9 15:24:04 2011 differ
Modified: lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.html?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.html (original)
+++ lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.html Mon May 9 15:24:04 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Apache Lucene - System Requirements</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/docs/systemrequirements.pdf?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/Document.java Mon May 9 15:24:04 2011
@@ -131,8 +131,13 @@ public final class Document {
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
- * Do not use this method with lazy loaded fields.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field getField(String name) {
return (Field) getFieldable(name);
}
@@ -154,6 +159,8 @@ public final class Document {
* this document, or null. If multiple fields exist with this name, this
* method returns the first value added. If only binary fields with this name
* exist, returns null.
+ * For {@link NumericField} it returns the string value of the number. If you want
+ * the actual {@code NumericField} instance back, use {@link #getFieldable}.
*/
public final String get(String name) {
for (Fieldable field : fields) {
@@ -177,13 +184,18 @@ public final class Document {
/**
* Returns an array of {@link Field}s with the given name.
- * Do not use with lazy loaded fields.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
*
* @param name the name of the field
* @return a <code>Field[]</code> array
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field[] getFields(String name) {
List<Field> result = new ArrayList<Field>();
for (Fieldable field : fields) {
@@ -230,6 +242,8 @@ public final class Document {
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * For {@link NumericField}s it returns the string value of the number. If you want
+ * the actual {@code NumericField} instances back, use {@link #getFieldables}.
* @param name the name of the field
* @return a <code>String[]</code> of field values
*/
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/document/NumericField.java Mon May 9 15:24:04 2011
@@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCac
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
- * <p><b>NOTE:</b> This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * <code>toString(value)</code> of the used data type).
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
- private final NumericTokenStream numericTS;
+ /** Data type of the value in {@link NumericField}.
+ * @since 3.2
+ */
+ public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+
+ private transient NumericTokenStream numericTS;
+ private DataType type;
+ private final int precisionStep;
/**
* Creates a field for numeric values using the default <code>precisionStep</code>
@@ -158,8 +158,8 @@ public final class NumericField extends
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
- * @param store if the field should be stored in plain text form
- * (according to <code>toString(value)</code> of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, Field.Store store, boolean index) {
@@ -186,19 +186,43 @@ public final class NumericField extends
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
- * @param store if the field should be stored in plain text form
- * (according to <code>toString(value)</code> of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+ this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true);
- numericTS = new NumericTokenStream(precisionStep);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
public TokenStream tokenStreamValue() {
- return isIndexed() ? numericTS : null;
+ if (!isIndexed())
+ return null;
+ if (numericTS == null) {
+ // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+ // if not needed (stored field loading)
+ numericTS = new NumericTokenStream(precisionStep);
+ // initialize value in TokenStream
+ if (fieldsData != null) {
+ assert type != null;
+ final Number val = (Number) fieldsData;
+ switch (type) {
+ case INT:
+ numericTS.setIntValue(val.intValue()); break;
+ case LONG:
+ numericTS.setLongValue(val.longValue()); break;
+ case FLOAT:
+ numericTS.setFloatValue(val.floatValue()); break;
+ case DOUBLE:
+ numericTS.setDoubleValue(val.doubleValue()); break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ }
+ return numericTS;
}
/** Returns always <code>null</code> for numeric fields */
@@ -212,7 +236,10 @@ public final class NumericField extends
return null;
}
- /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+ /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+ * on search results. It is recommended to use {@link Document#getFieldable} instead
+ * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+ * to return the stored value. */
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
@@ -224,7 +251,14 @@ public final class NumericField extends
/** Returns the precision step. */
public int getPrecisionStep() {
- return numericTS.getPrecisionStep();
+ return precisionStep;
+ }
+
+ /** Returns the data type of the current value, {@code null} if not yet set.
+ * @since 3.2
+ */
+ public DataType getDataType() {
+ return type;
}
/**
@@ -234,8 +268,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
*/
public NumericField setLongValue(final long value) {
- numericTS.setLongValue(value);
+ if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
+ type = DataType.LONG;
return this;
}
@@ -246,8 +281,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
*/
public NumericField setIntValue(final int value) {
- numericTS.setIntValue(value);
+ if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
+ type = DataType.INT;
return this;
}
@@ -258,8 +294,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
*/
public NumericField setDoubleValue(final double value) {
- numericTS.setDoubleValue(value);
+ if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
+ type = DataType.DOUBLE;
return this;
}
@@ -270,8 +307,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
*/
public NumericField setFloatValue(final float value) {
- numericTS.setFloatValue(value);
+ if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
+ type = DataType.FLOAT;
return this;
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java Mon May 9 15:24:04 2011
@@ -81,6 +81,6 @@ final class ByteSliceWriter extends Data
}
public int getAddress() {
- return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
+ return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK);
}
}
\ No newline at end of file
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CheckIndex.java Mon May 9 15:24:04 2011
@@ -661,10 +661,13 @@ public class CheckIndex {
status.termCount++;
final DocsEnum docs2;
+ final boolean hasPositions;
if (postings != null) {
docs2 = postings;
+ hasPositions = true;
} else {
docs2 = docs;
+ hasPositions = false;
}
int lastDoc = -1;
@@ -736,22 +739,60 @@ public class CheckIndex {
// Test skipping
if (docFreq >= 16) {
- for(int idx=0;idx<7;idx++) {
- final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
- docs = terms.docs(delDocs, docs);
- final int docID = docs.advance(skipDocID);
- if (docID == DocsEnum.NO_MORE_DOCS) {
- break;
- } else {
- if (docID < skipDocID) {
- throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
- }
- final int nextDocID = docs.nextDoc();
- if (nextDocID == DocsEnum.NO_MORE_DOCS) {
+ if (hasPositions) {
+ for(int idx=0;idx<7;idx++) {
+ final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
+ postings = terms.docsAndPositions(delDocs, postings);
+ final int docID = postings.advance(skipDocID);
+ if (docID == DocsEnum.NO_MORE_DOCS) {
break;
+ } else {
+ if (docID < skipDocID) {
+ throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
+ }
+ final int freq = postings.freq();
+ if (freq <= 0) {
+ throw new RuntimeException("termFreq " + freq + " is out of bounds");
+ }
+ int lastPosition = -1;
+ for(int posUpto=0;posUpto<freq;posUpto++) {
+ final int pos = postings.nextPosition();
+ if (pos < 0) {
+ throw new RuntimeException("position " + pos + " is out of bounds");
+ }
+ if (pos <= lastPosition) {
+ throw new RuntimeException("position " + pos + " is <= lastPosition " + lastPosition);
+ }
+ lastPosition = pos;
+ }
+
+ final int nextDocID = postings.nextDoc();
+ if (nextDocID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ if (nextDocID <= docID) {
+ throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
+ }
}
- if (nextDocID <= docID) {
- throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
+ }
+ } else {
+ for(int idx=0;idx<7;idx++) {
+ final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
+ docs = terms.docs(delDocs, docs);
+ final int docID = docs.advance(skipDocID);
+ if (docID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ } else {
+ if (docID < skipDocID) {
+ throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
+ }
+ final int nextDocID = docs.nextDoc();
+ if (nextDocID == DocsEnum.NO_MORE_DOCS) {
+ break;
+ }
+ if (nextDocID <= docID) {
+ throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
+ }
}
}
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Mon May 9 15:24:04 2011
@@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
* file. The {directory} that follows has that many entries. Each directory entry
* contains a long pointer to the start of this file's data section, and a String
* with that file's name.
+ *
+ * @lucene.internal
*/
-final class CompoundFileWriter {
+public final class CompoundFileWriter {
private static final class FileEntry {
/** source file */
@@ -136,8 +138,7 @@ final class CompoundFileWriter {
/** Merge files with the extensions added up to now.
* All files with these extensions are combined sequentially into the
- * compound stream. After successful merge, the source files
- * are deleted.
+ * compound stream.
* @throws IllegalStateException if close() had been called before or
* if no file has been added to this object
*/
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocConsumer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocConsumer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocConsumer.java Mon May 9 15:24:04 2011
@@ -18,11 +18,12 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
abstract class DocConsumer {
- abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException;
- abstract void flush(final Collection<DocConsumerPerThread> threads, final SegmentWriteState state) throws IOException;
+ abstract void processDocument(FieldInfos fieldInfos) throws IOException;
+ abstract void finishDocument() throws IOException;
+ abstract void flush(final SegmentWriteState state) throws IOException;
abstract void abort();
abstract boolean freeRAM();
+ abstract void doAfterFlush();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java Mon May 9 15:24:04 2011
@@ -18,22 +18,25 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.Map;
abstract class DocFieldConsumer {
- /** Called when DocumentsWriter decides to create a new
+ /** Called when DocumentsWriterPerThread decides to create a new
* segment */
- abstract void flush(Map<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+ abstract void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
/** Called when an aborting exception is hit */
abstract void abort();
- /** Add a new thread */
- abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException;
-
- /** Called when DocumentsWriter is using too much RAM.
+ /** Called when DocumentsWriterPerThread is using too much RAM.
* The consumer should free RAM, if possible, returning
* true if any RAM was in fact freed. */
abstract boolean freeRAM();
- }
+
+ abstract void startDocument() throws IOException;
+
+ abstract DocFieldConsumerPerField addField(FieldInfo fi);
+
+ abstract void finishDocument() throws IOException;
+
+}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java Mon May 9 15:24:04 2011
@@ -24,4 +24,5 @@ abstract class DocFieldConsumerPerField
/** Processes all occurrences of a single field */
abstract void processFields(Fieldable[] fields, int count) throws IOException;
abstract void abort();
+ abstract FieldInfo getFieldInfo();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Mon May 9 15:24:04 2011
@@ -19,8 +19,15 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
-import java.util.Map;
+import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
/**
@@ -33,26 +40,39 @@ import java.util.HashMap;
final class DocFieldProcessor extends DocConsumer {
- final DocumentsWriter docWriter;
final DocFieldConsumer consumer;
final StoredFieldsWriter fieldsWriter;
- public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
- this.docWriter = docWriter;
+ // Holds all fields seen in current doc
+ DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+ int fieldCount;
+
+ // Hash table for all fields ever seen
+ DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+ int hashMask = 1;
+ int totalFieldCount;
+
+ float docBoost;
+ int fieldGen;
+ final DocumentsWriterPerThread.DocState docState;
+
+ public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) {
+ this.docState = docWriter.docState;
this.consumer = consumer;
fieldsWriter = new StoredFieldsWriter(docWriter);
}
@Override
- public void flush(Collection<DocConsumerPerThread> threads, SegmentWriteState state) throws IOException {
+ public void flush(SegmentWriteState state) throws IOException {
- Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> childThreadsAndFields = new HashMap<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>>();
- for ( DocConsumerPerThread thread : threads) {
- DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread;
- childThreadsAndFields.put(perThread.consumer, perThread.fields());
+ Map<FieldInfo, DocFieldConsumerPerField> childFields = new HashMap<FieldInfo, DocFieldConsumerPerField>();
+ Collection<DocFieldConsumerPerField> fields = fields();
+ for (DocFieldConsumerPerField f : fields) {
+ childFields.put(f.getFieldInfo(), f);
}
+
fieldsWriter.flush(state);
- consumer.flush(childThreadsAndFields, state);
+ consumer.flush(childFields, state);
// Important to save after asking consumer to flush so
// consumer can alter the FieldInfo* if necessary. EG,
@@ -64,8 +84,20 @@ final class DocFieldProcessor extends Do
@Override
public void abort() {
- fieldsWriter.abort();
- consumer.abort();
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while(field != null) {
+ final DocFieldProcessorPerField next = field.next;
+ field.abort();
+ field = next;
+ }
+ }
+
+ try {
+ fieldsWriter.abort();
+ } finally {
+ consumer.abort();
+ }
}
@Override
@@ -73,8 +105,159 @@ final class DocFieldProcessor extends Do
return consumer.freeRAM();
}
+ public Collection<DocFieldConsumerPerField> fields() {
+ Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
+ for(int i=0;i<fieldHash.length;i++) {
+ DocFieldProcessorPerField field = fieldHash[i];
+ while(field != null) {
+ fields.add(field.consumer);
+ field = field.next;
+ }
+ }
+ assert fields.size() == totalFieldCount;
+ return fields;
+ }
+
+ /** In flush we reset the fieldHash to not maintain per-field state
+ * across segments */
@Override
- public DocConsumerPerThread addThread(DocumentsWriterThreadState threadState) throws IOException {
- return new DocFieldProcessorPerThread(threadState, this);
+ void doAfterFlush() {
+ fieldHash = new DocFieldProcessorPerField[2];
+ hashMask = 1;
+ totalFieldCount = 0;
+ }
+
+ private void rehash() {
+ final int newHashSize = (fieldHash.length*2);
+ assert newHashSize > fieldHash.length;
+
+ final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
+
+ // Rehash
+ int newHashMask = newHashSize-1;
+ for(int j=0;j<fieldHash.length;j++) {
+ DocFieldProcessorPerField fp0 = fieldHash[j];
+ while(fp0 != null) {
+ final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
+ DocFieldProcessorPerField nextFP0 = fp0.next;
+ fp0.next = newHashArray[hashPos2];
+ newHashArray[hashPos2] = fp0;
+ fp0 = nextFP0;
+ }
+ }
+
+ fieldHash = newHashArray;
+ hashMask = newHashMask;
}
+
+ @Override
+ public void processDocument(FieldInfos fieldInfos) throws IOException {
+
+ consumer.startDocument();
+ fieldsWriter.startDocument();
+
+ final Document doc = docState.doc;
+
+ fieldCount = 0;
+
+ final int thisFieldGen = fieldGen++;
+
+ final List<Fieldable> docFields = doc.getFields();
+ final int numDocFields = docFields.size();
+
+ // Absorb any new fields first seen in this document.
+ // Also absorb any changes to fields we had already
+ // seen before (eg suddenly turning on norms or
+ // vectors, etc.):
+
+ for(int i=0;i<numDocFields;i++) {
+ Fieldable field = docFields.get(i);
+ final String fieldName = field.name();
+
+ // Make sure we have a PerField allocated
+ final int hashPos = fieldName.hashCode() & hashMask;
+ DocFieldProcessorPerField fp = fieldHash[hashPos];
+ while(fp != null && !fp.fieldInfo.name.equals(fieldName)) {
+ fp = fp.next;
+ }
+
+ if (fp == null) {
+
+ // TODO FI: we need to genericize the "flags" that a
+ // field holds, and, how these flags are merged; it
+ // needs to be more "pluggable" such that if I want
+ // to have a new "thing" my Fields can do, I can
+ // easily add it
+ FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+
+ fp = new DocFieldProcessorPerField(this, fi);
+ fp.next = fieldHash[hashPos];
+ fieldHash[hashPos] = fp;
+ totalFieldCount++;
+
+ if (totalFieldCount >= fieldHash.length/2)
+ rehash();
+ } else {
+ fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
+ field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+ field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+ }
+
+ if (thisFieldGen != fp.lastGen) {
+
+ // First time we're seeing this field for this doc
+ fp.fieldCount = 0;
+
+ if (fieldCount == fields.length) {
+ final int newSize = fields.length*2;
+ DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
+ System.arraycopy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = fp;
+ fp.lastGen = thisFieldGen;
+ }
+
+ fp.addField(field);
+
+ if (field.isStored()) {
+ fieldsWriter.addField(field, fp.fieldInfo);
+ }
+ }
+
+ // If we are writing vectors then we must visit
+ // fields in sorted order so they are written in
+ // sorted order. TODO: we actually only need to
+ // sort the subset of fields that have vectors
+ // enabled; we could save [small amount of] CPU
+ // here.
+ ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
+
+ for(int i=0;i<fieldCount;i++)
+ fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
+
+ if (docState.maxTermPrefix != null && docState.infoStream != null) {
+ docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+ docState.maxTermPrefix = null;
+ }
+ }
+
+ private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
+ public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
+ return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
+ }
+ };
+
+ @Override
+ void finishDocument() throws IOException {
+ try {
+ fieldsWriter.finishDocument();
+ } finally {
+ consumer.finishDocument();
+ }
+ }
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java Mon May 9 15:24:04 2011
@@ -18,6 +18,8 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
/**
* Holds all per thread, per field state.
@@ -34,11 +36,22 @@ final class DocFieldProcessorPerField {
int fieldCount;
Fieldable[] fields = new Fieldable[1];
- public DocFieldProcessorPerField(final DocFieldProcessorPerThread perThread, final FieldInfo fieldInfo) {
- this.consumer = perThread.consumer.addField(fieldInfo);
+ public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
+ this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
this.fieldInfo = fieldInfo;
}
+ public void addField(Fieldable field) {
+ if (fieldCount == fields.length) {
+ int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+ Fieldable[] newArray = new Fieldable[newSize];
+ System.arraycopy(fields, 0, newArray, 0, fieldCount);
+ fields = newArray;
+ }
+
+ fields[fieldCount++] = field;
+ }
+
public void abort() {
consumer.abort();
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverter.java Mon May 9 15:24:04 2011
@@ -18,12 +18,13 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
-import java.util.Collection;
import java.util.HashMap;
-import java.util.HashSet;
-
import java.util.Map;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeSource;
+
/** This is a DocFieldConsumer that inverts each field,
* separately, from a Document, and accepts a
@@ -34,42 +35,72 @@ final class DocInverter extends DocField
final InvertedDocConsumer consumer;
final InvertedDocEndConsumer endConsumer;
- public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+ final DocumentsWriterPerThread.DocState docState;
+
+ final FieldInvertState fieldState = new FieldInvertState();
+
+ final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource();
+
+ static class SingleTokenAttributeSource extends AttributeSource {
+ final CharTermAttribute termAttribute;
+ final OffsetAttribute offsetAttribute;
+
+ private SingleTokenAttributeSource() {
+ termAttribute = addAttribute(CharTermAttribute.class);
+ offsetAttribute = addAttribute(OffsetAttribute.class);
+ }
+
+ public void reinit(String stringValue, int startOffset, int endOffset) {
+ termAttribute.setEmpty().append(stringValue);
+ offsetAttribute.setOffset(startOffset, endOffset);
+ }
+ }
+
+ // Used to read a string value for a field
+ final ReusableStringReader stringReader = new ReusableStringReader();
+
+ public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+ this.docState = docState;
this.consumer = consumer;
this.endConsumer = endConsumer;
}
@Override
- void flush(Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
- Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
- Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>>();
+ void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
- for (Map.Entry<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> entry : threadsAndFields.entrySet() ) {
+ Map<FieldInfo, InvertedDocConsumerPerField> childFieldsToFlush = new HashMap<FieldInfo, InvertedDocConsumerPerField>();
+ Map<FieldInfo, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new HashMap<FieldInfo, InvertedDocEndConsumerPerField>();
+ for (Map.Entry<FieldInfo, DocFieldConsumerPerField> fieldToFlush : fieldsToFlush.entrySet()) {
+ DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue();
+ childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer);
+ endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer);
+ }
- DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey();
+ consumer.flush(childFieldsToFlush, state);
+ endConsumer.flush(endChildFieldsToFlush, state);
+ }
- Collection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>();
- Collection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>();
- for (final DocFieldConsumerPerField field: entry.getValue() ) {
- DocInverterPerField perField = (DocInverterPerField) field;
- childFields.add(perField.consumer);
- endChildFields.add(perField.endConsumer);
- }
+ @Override
+ public void startDocument() throws IOException {
+ consumer.startDocument();
+ endConsumer.startDocument();
+ }
- childThreadsAndFields.put(perThread.consumer, childFields);
- endChildThreadsAndFields.put(perThread.endConsumer, endChildFields);
- }
-
- consumer.flush(childThreadsAndFields, state);
- endConsumer.flush(endChildThreadsAndFields, state);
+ public void finishDocument() throws IOException {
+ // TODO: allow endConsumer.finishDocument to also return
+ // a DocWriter
+ endConsumer.finishDocument();
+ consumer.finishDocument();
}
@Override
void abort() {
- consumer.abort();
- endConsumer.abort();
+ try {
+ consumer.abort();
+ } finally {
+ endConsumer.abort();
+ }
}
@Override
@@ -78,7 +109,8 @@ final class DocInverter extends DocField
}
@Override
- public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) {
- return new DocInverterPerThread(docFieldProcessorPerThread, this);
+ public DocFieldConsumerPerField addField(FieldInfo fi) {
+ return new DocInverterPerField(this, fi);
}
+
}
Modified: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1101062&r1=1101061&r2=1101062&view=diff
==============================================================================
--- lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Mon May 9 15:24:04 2011
@@ -35,20 +35,20 @@ import org.apache.lucene.analysis.tokena
final class DocInverterPerField extends DocFieldConsumerPerField {
- final private DocInverterPerThread perThread;
- final private FieldInfo fieldInfo;
+ final private DocInverter parent;
+ final FieldInfo fieldInfo;
final InvertedDocConsumerPerField consumer;
final InvertedDocEndConsumerPerField endConsumer;
- final DocumentsWriter.DocState docState;
+ final DocumentsWriterPerThread.DocState docState;
final FieldInvertState fieldState;
- public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
- this.perThread = perThread;
+ public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) {
+ this.parent = parent;
this.fieldInfo = fieldInfo;
- docState = perThread.docState;
- fieldState = perThread.fieldState;
- this.consumer = perThread.consumer.addField(this, fieldInfo);
- this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
+ docState = parent.docState;
+ fieldState = parent.fieldState;
+ this.consumer = parent.consumer.addField(this, fieldInfo);
+ this.endConsumer = parent.endConsumer.addField(this, fieldInfo);
}
@Override
@@ -80,8 +80,8 @@ final class DocInverterPerField extends
if (!field.isTokenized()) { // un-tokenized field
String stringValue = field.stringValue();
final int valueLength = stringValue.length();
- perThread.singleToken.reinit(stringValue, 0, valueLength);
- fieldState.attributeSource = perThread.singleToken;
+ parent.singleToken.reinit(stringValue, 0, valueLength);
+ fieldState.attributeSource = parent.singleToken;
consumer.start(field);
boolean success = false;
@@ -89,8 +89,9 @@ final class DocInverterPerField extends
consumer.add();
success = true;
} finally {
- if (!success)
+ if (!success) {
docState.docWriter.setAborting();
+ }
}
fieldState.offset += valueLength;
fieldState.length++;
@@ -114,8 +115,8 @@ final class DocInverterPerField extends
if (stringValue == null) {
throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
}
- perThread.stringReader.init(stringValue);
- reader = perThread.stringReader;
+ parent.stringReader.init(stringValue);
+ reader = parent.stringReader;
}
// Tokenize field and add to postingTable
@@ -166,8 +167,9 @@ final class DocInverterPerField extends
consumer.add();
success = true;
} finally {
- if (!success)
+ if (!success) {
docState.docWriter.setAborting();
+ }
}
fieldState.length++;
fieldState.position++;
@@ -195,4 +197,9 @@ final class DocInverterPerField extends
consumer.finish();
endConsumer.finish();
}
+
+ @Override
+ FieldInfo getFieldInfo() {
+ return fieldInfo;
+ }
}
Copied: lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (from r1088049, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocTermOrds.java?p2=lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocTermOrds.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java&r1=1088049&r2=1101062&rev=1101062&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/DocTermOrds.java (original)
+++ lucene/dev/branches/bulkpostings/lucene/src/java/org/apache/lucene/index/DocTermOrds.java Mon May 9 15:24:04 2011
@@ -17,6 +17,7 @@
package org.apache.lucene.index;
+import org.apache.lucene.index.BulkPostingsEnum.BlockReader;
import org.apache.lucene.util.PagedBytes;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Bits;
@@ -270,7 +271,7 @@ public class DocTermOrds {
// frequent terms ahead of time.
int termNum = 0;
- DocsEnum docsEnum = null;
+ BulkPostingsEnum bulkEnum = null;
// Loop begins with te positioned to first term (we call
// seek above):
@@ -310,25 +311,32 @@ public class DocTermOrds {
final int df = te.docFreq();
if (df <= maxTermDocFreq) {
- docsEnum = te.docs(delDocs, docsEnum);
-
- final DocsEnum.BulkReadResult bulkResult = docsEnum.getBulkResult();
-
+ // nocommit: please review this bulk impl, might not be the best
+ bulkEnum = te.bulkPostings(bulkEnum, false, false);
+ BlockReader blockReader = bulkEnum.getDocDeltasReader();
+ int buffer[] = blockReader.getBuffer();
+ int offset = blockReader.offset();
+ int end = blockReader.end();
+
+ int doc = 0;
+ int remaining = df;
// dF, but takes deletions into account
int actualDF = 0;
- for (;;) {
- int chunk = docsEnum.read();
- if (chunk <= 0) {
- break;
- }
- //System.out.println(" chunk=" + chunk + " docs");
-
- actualDF += chunk;
-
- for (int i=0; i<chunk; i++) {
+ while (remaining > 0) {
+ int chunk = Math.min(end - offset, remaining);
+ remaining -= chunk;
+
+ final int validEnd = offset + chunk;
+ for (int i=offset; i<validEnd; i++) {
+ doc += buffer[i];
+
+ if (delDocs != null && delDocs.get(doc)) {
+ continue; // deleted document
+ }
+ actualDF++;
termInstances++;
- int doc = bulkResult.docs.ints[i];
+
//System.out.println(" docID=" + doc);
// add TNUM_OFFSET to the term number to make room for special reserved values:
// 0 (end term) and 1 (index into byte array follows)
@@ -398,6 +406,10 @@ public class DocTermOrds {
}
}
}
+ if (remaining > 0) {
+ offset = 0;
+ end = blockReader.fill();
+ }
}
setActualDocFreq(termNum, actualDF);
}