You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/13 13:18:25 UTC
svn commit: r1102677 [2/6] - in /lucene/dev/branches/docvalues: ./
dev-tools/eclipse/ dev-tools/maven/
dev-tools/maven/solr/contrib/dataimporthandler/src/extras/
dev-tools/maven/solr/src/ dev-tools/maven/solr/src/solrj/
dev-tools/scripts/ lucene/ lucen...
Modified: lucene/dev/branches/docvalues/lucene/docs/fileformats.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/fileformats.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/fileformats.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/fileformats.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Index File Formats
@@ -425,11 +425,19 @@ document.write("Last Published: " + docu
<p>
In version 3.1, segments records the code version
that created them. See LUCENE-2720 for details.
+
+ Additionally segments track explicitly whether or
+ not they have term vectors. See LUCENE-2811 for details.
+ </p>
+<p>
+ In version 3.2, numeric fields are written as natively
+ to stored fields file, previously they were stored in
+ text format only.
</p>
</div>
-<a name="N10037"></a><a name="Definitions"></a>
+<a name="N1003A"></a><a name="Definitions"></a>
<h2 class="boxed">Definitions</h2>
<div class="section">
<p>
@@ -470,7 +478,7 @@ document.write("Last Published: " + docu
strings, the first naming the field, and the second naming text
within the field.
</p>
-<a name="N10057"></a><a name="Inverted Indexing"></a>
+<a name="N1005A"></a><a name="Inverted Indexing"></a>
<h3 class="boxed">Inverted Indexing</h3>
<p>
The index stores statistics about terms in order
@@ -480,7 +488,7 @@ document.write("Last Published: " + docu
it. This is the inverse of the natural relationship, in which
documents list terms.
</p>
-<a name="N10063"></a><a name="Types of Fields"></a>
+<a name="N10066"></a><a name="Types of Fields"></a>
<h3 class="boxed">Types of Fields</h3>
<p>
In Lucene, fields may be <i>stored</i>, in which
@@ -494,7 +502,7 @@ document.write("Last Published: " + docu
to be indexed literally.
</p>
<p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
-<a name="N10080"></a><a name="Segments"></a>
+<a name="N10083"></a><a name="Segments"></a>
<h3 class="boxed">Segments</h3>
<p>
Lucene indexes may be composed of multiple sub-indexes, or
@@ -520,7 +528,7 @@ document.write("Last Published: " + docu
Searches may involve multiple segments and/or multiple indexes, each
index potentially composed of a set of segments.
</p>
-<a name="N1009E"></a><a name="Document Numbers"></a>
+<a name="N100A1"></a><a name="Document Numbers"></a>
<h3 class="boxed">Document Numbers</h3>
<p>
Internally, Lucene refers to documents by an integer <i>document
@@ -575,7 +583,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N100C5"></a><a name="Overview"></a>
+<a name="N100C8"></a><a name="Overview"></a>
<h2 class="boxed">Overview</h2>
<div class="section">
<p>
@@ -674,7 +682,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N10108"></a><a name="File Naming"></a>
+<a name="N1010B"></a><a name="File Naming"></a>
<h2 class="boxed">File Naming</h2>
<div class="section">
<p>
@@ -701,7 +709,7 @@ document.write("Last Published: " + docu
</p>
</div>
-<a name="N10117"></a><a name="file-names"></a>
+<a name="N1011A"></a><a name="file-names"></a>
<h2 class="boxed">Summary of File Extensions</h2>
<div class="section">
<p>The following table summarizes the names and extensions of the files in Lucene:
@@ -843,10 +851,10 @@ document.write("Last Published: " + docu
</div>
-<a name="N10201"></a><a name="Primitive Types"></a>
+<a name="N10204"></a><a name="Primitive Types"></a>
<h2 class="boxed">Primitive Types</h2>
<div class="section">
-<a name="N10206"></a><a name="Byte"></a>
+<a name="N10209"></a><a name="Byte"></a>
<h3 class="boxed">Byte</h3>
<p>
The most primitive type
@@ -854,7 +862,7 @@ document.write("Last Published: " + docu
other data types are defined as sequences
of bytes, so file formats are byte-order independent.
</p>
-<a name="N1020F"></a><a name="UInt32"></a>
+<a name="N10212"></a><a name="UInt32"></a>
<h3 class="boxed">UInt32</h3>
<p>
32-bit unsigned integers are written as four
@@ -864,7 +872,7 @@ document.write("Last Published: " + docu
UInt32 --> <Byte><sup>4</sup>
</p>
-<a name="N1021E"></a><a name="Uint64"></a>
+<a name="N10221"></a><a name="Uint64"></a>
<h3 class="boxed">Uint64</h3>
<p>
64-bit unsigned integers are written as eight
@@ -873,7 +881,7 @@ document.write("Last Published: " + docu
<p>UInt64 --> <Byte><sup>8</sup>
</p>
-<a name="N1022D"></a><a name="VInt"></a>
+<a name="N10230"></a><a name="VInt"></a>
<h3 class="boxed">VInt</h3>
<p>
A variable-length format for positive integers is
@@ -1423,13 +1431,13 @@ document.write("Last Published: " + docu
This provides compression while still being
efficient to decode.
</p>
-<a name="N10512"></a><a name="Chars"></a>
+<a name="N10515"></a><a name="Chars"></a>
<h3 class="boxed">Chars</h3>
<p>
Lucene writes unicode
character sequences as UTF-8 encoded bytes.
</p>
-<a name="N1051B"></a><a name="String"></a>
+<a name="N1051E"></a><a name="String"></a>
<h3 class="boxed">String</h3>
<p>
Lucene writes strings as UTF-8 encoded bytes.
@@ -1442,10 +1450,10 @@ document.write("Last Published: " + docu
</div>
-<a name="N10528"></a><a name="Compound Types"></a>
+<a name="N1052B"></a><a name="Compound Types"></a>
<h2 class="boxed">Compound Types</h2>
<div class="section">
-<a name="N1052D"></a><a name="MapStringString"></a>
+<a name="N10530"></a><a name="MapStringString"></a>
<h3 class="boxed">Map<String,String></h3>
<p>
In a couple places Lucene stores a Map
@@ -1458,13 +1466,13 @@ document.write("Last Published: " + docu
</div>
-<a name="N1053D"></a><a name="Per-Index Files"></a>
+<a name="N10540"></a><a name="Per-Index Files"></a>
<h2 class="boxed">Per-Index Files</h2>
<div class="section">
<p>
The files in this section exist one-per-index.
</p>
-<a name="N10545"></a><a name="Segments File"></a>
+<a name="N10548"></a><a name="Segments File"></a>
<h3 class="boxed">Segments File</h3>
<p>
The active segments in the index are stored in the
@@ -1508,7 +1516,7 @@ document.write("Last Published: " + docu
<b>3.1</b>
Segments --> Format, Version, NameCounter, SegCount, <SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
NormGen<sup>NumField</sup>,
- IsCompoundFile, DeletionCount, HasProx, Diagnostics><sup>SegCount</sup>, CommitUserData, Checksum
+ IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors><sup>SegCount</sup>, CommitUserData, Checksum
</p>
<p>
Format, NameCounter, SegCount, SegSize, NumField,
@@ -1525,7 +1533,7 @@ document.write("Last Published: " + docu
</p>
<p>
IsCompoundFile, HasSingleNormFile,
- DocStoreIsCompoundFile, HasProx --> Int8
+ DocStoreIsCompoundFile, HasProx, HasVectors --> Int8
</p>
<p>
CommitUserData --> Map<String,String>
@@ -1634,7 +1642,10 @@ document.write("Last Published: " + docu
Lucene version, OS, Java version, why the segment
was created (merge, flush, addIndexes), etc.
</p>
-<a name="N105CD"></a><a name="Lock File"></a>
+<p> HasVectors is 1 if this segment stores term vectors,
+ else it's 0.
+ </p>
+<a name="N105D3"></a><a name="Lock File"></a>
<h3 class="boxed">Lock File</h3>
<p>
The write lock, which is stored in the index
@@ -1648,14 +1659,14 @@ document.write("Last Published: " + docu
documents). This lock file ensures that only one
writer is modifying the index at a time.
</p>
-<a name="N105D6"></a><a name="Deletable File"></a>
+<a name="N105DC"></a><a name="Deletable File"></a>
<h3 class="boxed">Deletable File</h3>
<p>
A writer dynamically computes
the files that are deletable, instead, so no file
is written.
</p>
-<a name="N105DF"></a><a name="Compound Files"></a>
+<a name="N105E5"></a><a name="Compound Files"></a>
<h3 class="boxed">Compound Files</h3>
<p>Starting with Lucene 1.4 the compound file format became default. This
is simply a container for all files described in the next section
@@ -1682,14 +1693,14 @@ document.write("Last Published: " + docu
</div>
-<a name="N10607"></a><a name="Per-Segment Files"></a>
+<a name="N1060D"></a><a name="Per-Segment Files"></a>
<h2 class="boxed">Per-Segment Files</h2>
<div class="section">
<p>
The remaining files are all per-segment, and are
thus defined by suffix.
</p>
-<a name="N1060F"></a><a name="Fields"></a>
+<a name="N10615"></a><a name="Fields"></a>
<h3 class="boxed">Fields</h3>
<p>
@@ -1862,13 +1873,29 @@ document.write("Last Published: " + docu
<li>third bit is one for fields with compression option enabled
(if compression is enabled, the algorithm used is ZLIB),
only available for indexes until Lucene version 2.9.x</li>
+
+<li>4th to 6th bits (mask: 0x7<<3) define the type of a
+ numeric field: <ul>
+
+<li>all bits in mask are cleared if no numeric field at all</li>
+
+<li>1<<3: Value is Int</li>
+
+<li>2<<3: Value is Long</li>
+
+<li>3<<3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+
+<li>4<<3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+
+</ul>
+</li>
</ul>
</p>
<p>Value -->
- String | BinaryValue (depending on Bits)
+ String | BinaryValue | Int | Long (depending on Bits)
</p>
<p>BinaryValue -->
@@ -1883,7 +1910,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N106B6"></a><a name="Term Dictionary"></a>
+<a name="N106D0"></a><a name="Term Dictionary"></a>
<h3 class="boxed">Term Dictionary</h3>
<p>
The term dictionary is represented as two files:
@@ -2075,7 +2102,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N1073A"></a><a name="Frequencies"></a>
+<a name="N10754"></a><a name="Frequencies"></a>
<h3 class="boxed">Frequencies</h3>
<p>
The .frq file contains the lists of documents
@@ -2203,7 +2230,7 @@ document.write("Last Published: " + docu
entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
to entry 31 on level 0.
</p>
-<a name="N107C2"></a><a name="Positions"></a>
+<a name="N107DC"></a><a name="Positions"></a>
<h3 class="boxed">Positions</h3>
<p>
The .prx file contains the lists of positions that
@@ -2273,7 +2300,7 @@ document.write("Last Published: " + docu
Payload. If PayloadLength is not stored, then this Payload has the same
length as the Payload at the previous position.
</p>
-<a name="N107FE"></a><a name="Normalization Factors"></a>
+<a name="N10818"></a><a name="Normalization Factors"></a>
<h3 class="boxed">Normalization Factors</h3>
<p>There's a single .nrm file containing all norms:
</p>
@@ -2353,7 +2380,7 @@ document.write("Last Published: " + docu
</p>
<p>Separate norm files are created (when adequate) for both compound and non compound segments.
</p>
-<a name="N1084F"></a><a name="Term Vectors"></a>
+<a name="N10869"></a><a name="Term Vectors"></a>
<h3 class="boxed">Term Vectors</h3>
<p>
Term Vector support is an optional on a field by
@@ -2489,7 +2516,7 @@ document.write("Last Published: " + docu
</li>
</ol>
-<a name="N108EB"></a><a name="Deleted Documents"></a>
+<a name="N10905"></a><a name="Deleted Documents"></a>
<h3 class="boxed">Deleted Documents</h3>
<p>The .del file is
optional, and only exists when a segment contains deletions.
@@ -2553,7 +2580,7 @@ document.write("Last Published: " + docu
</div>
-<a name="N10925"></a><a name="Limitations"></a>
+<a name="N1093F"></a><a name="Limitations"></a>
<h2 class="boxed">Limitations</h2>
<div class="section">
<p>
Modified: lucene/dev/branches/docvalues/lucene/docs/fileformats.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/fileformats.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/fileformats.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/fileformats.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/gettingstarted.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/gettingstarted.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/gettingstarted.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/gettingstarted.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Getting Started Guide
@@ -268,15 +268,13 @@ may wish to skip sections.
<li>
<a href="demo.html">About the command-line Lucene demo and its usage</a>. This section
- is intended for anyone who wants to use the command-line Lucene demo.</li>
-<p></p>
+ is intended for anyone who wants to use the command-line Lucene demo.</li>
<li>
<a href="demo2.html">About the sources and implementation for the command-line Lucene
demo</a>. This section walks through the implementation details (sources) of the
- command-line Lucene demo. This section is intended for developers.</li>
-<p></p>
+ command-line Lucene demo. This section is intended for developers.</li>
</ul>
</div>
Modified: lucene/dev/branches/docvalues/lucene/docs/gettingstarted.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/gettingstarted.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/gettingstarted.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/gettingstarted.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/index.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/index.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/index.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Lucene Java Documentation</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/docvalues/lucene/docs/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/index.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/index.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/index.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/linkmap.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/linkmap.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/linkmap.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/linkmap.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Site Linkmap Table of Contents</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/docvalues/lucene/docs/linkmap.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/linkmap.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/linkmap.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/linkmap.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Lucene Contrib
Modified: lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/lucene-contrib/index.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Query Parser Syntax
Modified: lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/queryparsersyntax.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/scoring.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/scoring.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/scoring.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/scoring.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>
Apache Lucene - Scoring
Modified: lucene/dev/branches/docvalues/lucene/docs/scoring.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/scoring.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Files lucene/dev/branches/docvalues/lucene/docs/scoring.pdf (original) and lucene/dev/branches/docvalues/lucene/docs/scoring.pdf Fri May 13 11:18:19 2011 differ
Modified: lucene/dev/branches/docvalues/lucene/docs/systemrequirements.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/systemrequirements.html?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/docs/systemrequirements.html (original)
+++ lucene/dev/branches/docvalues/lucene/docs/systemrequirements.html Fri May 13 11:18:19 2011
@@ -3,7 +3,7 @@
<head>
<META http-equiv="Content-Type" content="text/html; charset=UTF-8">
<meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
<meta name="Forrest-skin-name" content="lucene">
<title>Apache Lucene - System Requirements</title>
<link type="text/css" href="skin/basic.css" rel="stylesheet">
Modified: lucene/dev/branches/docvalues/lucene/docs/systemrequirements.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/docs/systemrequirements.pdf?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
Binary files - no diff available.
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/Document.java Fri May 13 11:18:19 2011
@@ -131,8 +131,13 @@ public final class Document {
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
- * Do not use this method with lazy loaded fields.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field getField(String name) {
return (Field) getFieldable(name);
}
@@ -154,6 +159,8 @@ public final class Document {
* this document, or null. If multiple fields exist with this name, this
* method returns the first value added. If only binary fields with this name
* exist, returns null.
+ * For {@link NumericField} it returns the string value of the number. If you want
+ * the actual {@code NumericField} instance back, use {@link #getFieldable}.
*/
public final String get(String name) {
for (Fieldable field : fields) {
@@ -177,13 +184,18 @@ public final class Document {
/**
* Returns an array of {@link Field}s with the given name.
- * Do not use with lazy loaded fields.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * Do not use this method with lazy loaded fields or {@link NumericField}.
*
* @param name the name of the field
* @return a <code>Field[]</code> array
+ * @deprecated use {@link #getFieldable} instead and cast depending on
+ * data type.
+ * @throws ClassCastException if you try to retrieve a numerical or
+ * lazy loaded field.
*/
+ @Deprecated
public final Field[] getFields(String name) {
List<Field> result = new ArrayList<Field>();
for (Fieldable field : fields) {
@@ -230,6 +242,8 @@ public final class Document {
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
+ * For {@link NumericField}s it returns the string value of the number. If you want
+ * the actual {@code NumericField} instances back, use {@link #getFieldables}.
* @param name the name of the field
* @return a <code>String[]</code> of field values
*/
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/document/NumericField.java Fri May 13 11:18:19 2011
@@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCac
* class is a wrapper around this token stream type for
* easier, more intuitive usage.</p>
*
- * <p><b>NOTE:</b> This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * <code>toString(value)</code> of the used data type).
- *
* @since 2.9
*/
public final class NumericField extends AbstractField {
- private final NumericTokenStream numericTS;
+ /** Data type of the value in {@link NumericField}.
+ * @since 3.2
+ */
+ public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+
+ private transient NumericTokenStream numericTS;
+ private DataType type;
+ private final int precisionStep;
/**
* Creates a field for numeric values using the default <code>precisionStep</code>
@@ -158,8 +158,8 @@ public final class NumericField extends
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
- * @param store if the field should be stored in plain text form
- * (according to <code>toString(value)</code> of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, Field.Store store, boolean index) {
@@ -186,19 +186,43 @@ public final class NumericField extends
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
- * @param store if the field should be stored in plain text form
- * (according to <code>toString(value)</code> of the used data type)
+ * @param store if the field should be stored, {@link Document#getFieldable}
+ * then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
*/
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+ this.precisionStep = precisionStep;
setOmitTermFreqAndPositions(true);
- numericTS = new NumericTokenStream(precisionStep);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
public TokenStream tokenStreamValue() {
- return isIndexed() ? numericTS : null;
+ if (!isIndexed())
+ return null;
+ if (numericTS == null) {
+ // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+ // if not needed (stored field loading)
+ numericTS = new NumericTokenStream(precisionStep);
+ // initialize value in TokenStream
+ if (fieldsData != null) {
+ assert type != null;
+ final Number val = (Number) fieldsData;
+ switch (type) {
+ case INT:
+ numericTS.setIntValue(val.intValue()); break;
+ case LONG:
+ numericTS.setLongValue(val.longValue()); break;
+ case FLOAT:
+ numericTS.setFloatValue(val.floatValue()); break;
+ case DOUBLE:
+ numericTS.setDoubleValue(val.doubleValue()); break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ }
+ return numericTS;
}
/** Returns always <code>null</code> for numeric fields */
@@ -212,7 +236,10 @@ public final class NumericField extends
return null;
}
- /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+ /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+ * on search results. It is recommended to use {@link Document#getFieldable} instead
+ * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+ * to return the stored value. */
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
@@ -224,7 +251,14 @@ public final class NumericField extends
/** Returns the precision step. */
public int getPrecisionStep() {
- return numericTS.getPrecisionStep();
+ return precisionStep;
+ }
+
+ /** Returns the data type of the current value, {@code null} if not yet set.
+ * @since 3.2
+ */
+ public DataType getDataType() {
+ return type;
}
/**
@@ -234,8 +268,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
*/
public NumericField setLongValue(final long value) {
- numericTS.setLongValue(value);
+ if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
+ type = DataType.LONG;
return this;
}
@@ -246,8 +281,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
*/
public NumericField setIntValue(final int value) {
- numericTS.setIntValue(value);
+ if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
+ type = DataType.INT;
return this;
}
@@ -258,8 +294,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
*/
public NumericField setDoubleValue(final double value) {
- numericTS.setDoubleValue(value);
+ if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
+ type = DataType.DOUBLE;
return this;
}
@@ -270,8 +307,9 @@ public final class NumericField extends
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
*/
public NumericField setFloatValue(final float value) {
- numericTS.setFloatValue(value);
+ if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
+ type = DataType.FLOAT;
return this;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java Fri May 13 11:18:19 2011
@@ -132,9 +132,9 @@ class BufferedDeletesStream {
public final long gen;
// If non-null, contains segments that are 100% deleted
- public final SegmentInfos allDeleted;
+ public final List<SegmentInfo> allDeleted;
- ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) {
+ ApplyDeletesResult(boolean anyDeletes, long gen, List<SegmentInfo> allDeleted) {
this.anyDeletes = anyDeletes;
this.gen = gen;
this.allDeleted = allDeleted;
@@ -164,7 +164,7 @@ class BufferedDeletesStream {
/** Resolves the buffered deleted Term/Query/docIDs, into
* actual deleted docIDs in the deletedDocs BitVector for
* each SegmentReader. */
- public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException {
+ public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List<SegmentInfo> infos) throws IOException {
final long t0 = System.currentTimeMillis();
if (infos.size() == 0) {
@@ -182,7 +182,7 @@ class BufferedDeletesStream {
message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
}
- SegmentInfos infos2 = new SegmentInfos();
+ List<SegmentInfo> infos2 = new ArrayList<SegmentInfo>();
infos2.addAll(infos);
Collections.sort(infos2, sortSegInfoByDelGen);
@@ -192,7 +192,7 @@ class BufferedDeletesStream {
int infosIDX = infos2.size()-1;
int delIDX = deletes.size()-1;
- SegmentInfos allDeleted = null;
+ List<SegmentInfo> allDeleted = null;
while (infosIDX >= 0) {
//System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
@@ -245,7 +245,7 @@ class BufferedDeletesStream {
if (segAllDeletes) {
if (allDeleted == null) {
- allDeleted = new SegmentInfos();
+ allDeleted = new ArrayList<SegmentInfo>();
}
allDeleted.add(info);
}
@@ -287,7 +287,7 @@ class BufferedDeletesStream {
if (segAllDeletes) {
if (allDeleted == null) {
- allDeleted = new SegmentInfos();
+ allDeleted = new ArrayList<SegmentInfo>();
}
allDeleted.add(info);
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Fri May 13 11:18:19 2011
@@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
* file. The {directory} that follows has that many entries. Each directory entry
* contains a long pointer to the start of this file's data section, and a String
* with that file's name.
+ *
+ * @lucene.internal
*/
-final class CompoundFileWriter {
+public final class CompoundFileWriter {
static final class FileEntry {
@@ -137,8 +139,7 @@ final class CompoundFileWriter {
/** Merge files with the extensions added up to now.
* All files with these extensions are combined sequentially into the
- * compound stream. After successful merge, the source files
- * are deleted.
+ * compound stream.
* @throws IllegalStateException if close() had been called before or
* if no file has been added to this object
*/
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Fri May 13 11:18:19 2011
@@ -135,8 +135,8 @@ public class ConcurrentMergeScheduler ex
final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
- final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount();
- final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount();
+ final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount;
+ final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount;
return c2 - c1;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Fri May 13 11:18:19 2011
@@ -263,9 +263,10 @@ final class DocFieldProcessor extends Do
// enabled; we could save [small amount of] CPU
// here.
ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
-
- for(int i=0;i<fieldCount;i++)
- fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
+ for(int i=0;i<fieldCount;i++) {
+ final DocFieldProcessorPerField perField = fields[i];
+ perField.consumer.processFields(perField.fields, perField.fieldCount);
+ }
if (docState.maxTermPrefix != null && docState.infoStream != null) {
docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped. Please correct the analyzer to not produce such terms. The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java Fri May 13 11:18:19 2011
@@ -188,7 +188,7 @@ final class DocumentsWriter {
this.infoStream = infoStream;
final Iterator<ThreadState> it = perThreadPool.getAllPerThreadsIterator();
while (it.hasNext()) {
- it.next().perThread.docState.infoStream = infoStream;
+ it.next().perThread.setInfoStream(infoStream);
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java Fri May 13 11:18:19 2011
@@ -63,9 +63,10 @@ import org.apache.lucene.search.Query;
*/
final class DocumentsWriterDeleteQueue {
- private volatile Node tail;
+ private volatile Node<?> tail;
- private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue, Node> tailUpdater = AtomicReferenceFieldUpdater
+ @SuppressWarnings("rawtypes")
+ private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue,Node> tailUpdater = AtomicReferenceFieldUpdater
.newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");
private final DeleteSlice globalSlice;
@@ -90,7 +91,7 @@ final class DocumentsWriterDeleteQueue {
* we use a sentinel instance as our initial tail. No slice will ever try to
* apply this tail since the head is always omitted.
*/
- tail = new Node(null); // sentinel
+ tail = new Node<Object>(null); // sentinel
globalSlice = new DeleteSlice(tail);
}
@@ -126,14 +127,14 @@ final class DocumentsWriterDeleteQueue {
// we can do it just every n times or so?
}
- void add(Node item) {
+ void add(Node<?> item) {
/*
* this non-blocking / 'wait-free' linked list add was inspired by Apache
* Harmony's ConcurrentLinkedQueue Implementation.
*/
while (true) {
- final Node currentTail = this.tail;
- final Node tailNext = currentTail.next;
+ final Node<?> currentTail = this.tail;
+ final Node<?> tailNext = currentTail.next;
if (tail == currentTail) {
if (tailNext != null) {
/*
@@ -196,7 +197,7 @@ final class DocumentsWriterDeleteQueue {
* deletes in the queue and reset the global slice to let the GC prune the
* queue.
*/
- final Node currentTail = tail; // take the current tail make this local any
+ final Node<?> currentTail = tail; // take the current tail make this local any
// Changes after this call are applied later
// and not relevant here
if (callerSlice != null) {
@@ -232,10 +233,10 @@ final class DocumentsWriterDeleteQueue {
static class DeleteSlice {
// No need to be volatile, slices are thread captive (only accessed by one thread)!
- Node sliceHead; // we don't apply this one
- Node sliceTail;
+ Node<?> sliceHead; // we don't apply this one
+ Node<?> sliceTail;
- DeleteSlice(Node currentTail) {
+ DeleteSlice(Node<?> currentTail) {
assert currentTail != null;
/*
* Initially this is a 0 length slice pointing to the 'current' tail of
@@ -256,7 +257,7 @@ final class DocumentsWriterDeleteQueue {
* tail in this slice are not equal then there will be at least one more
* non-null node in the slice!
*/
- Node current = sliceHead;
+ Node<?> current = sliceHead;
do {
current = current.next;
assert current != null : "slice property violated between the head on the tail must not be a null node";
@@ -290,7 +291,7 @@ final class DocumentsWriterDeleteQueue {
void clear() {
globalBufferLock.lock();
try {
- final Node currentTail = tail;
+ final Node<?> currentTail = tail;
globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
globalBufferedDeletes.clear();
} finally {
@@ -298,27 +299,28 @@ final class DocumentsWriterDeleteQueue {
}
}
- private static class Node {
- volatile Node next;
- final Object item;
+ private static class Node<T> {
+ volatile Node<?> next;
+ final T item;
- private Node(Object item) {
+ Node(T item) {
this.item = item;
}
- static final AtomicReferenceFieldUpdater<Node, Node> nextUpdater = AtomicReferenceFieldUpdater
+ @SuppressWarnings("rawtypes")
+ static final AtomicReferenceFieldUpdater<Node,Node> nextUpdater = AtomicReferenceFieldUpdater
.newUpdater(Node.class, Node.class, "next");
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
assert false : "sentinel item must never be applied";
}
- boolean casNext(Node cmp, Node val) {
+ boolean casNext(Node<?> cmp, Node<?> val) {
return nextUpdater.compareAndSet(this, cmp, val);
}
}
- private static final class TermNode extends Node {
+ private static final class TermNode extends Node<Term> {
TermNode(Term term) {
super(term);
@@ -326,33 +328,31 @@ final class DocumentsWriterDeleteQueue {
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- bufferedDeletes.addTerm((Term) item, docIDUpto);
+ bufferedDeletes.addTerm(item, docIDUpto);
}
}
- private static final class QueryArrayNode extends Node {
+ private static final class QueryArrayNode extends Node<Query[]> {
QueryArrayNode(Query[] query) {
super(query);
}
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- final Query[] queries = (Query[]) item;
- for (Query query : queries) {
+ for (Query query : item) {
bufferedDeletes.addQuery(query, docIDUpto);
}
}
}
- private static final class TermArrayNode extends Node {
+ private static final class TermArrayNode extends Node<Term[]> {
TermArrayNode(Term[] term) {
super(term);
}
@Override
void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
- final Term[] terms = (Term[]) item;
- for (Term term : terms) {
+ for (Term term : item) {
bufferedDeletes.addTerm(term, docIDUpto);
}
}
@@ -361,7 +361,7 @@ final class DocumentsWriterDeleteQueue {
private boolean forceApplyGlobalSlice() {
globalBufferLock.lock();
- final Node currentTail = tail;
+ final Node<?> currentTail = tail;
try {
if (globalSlice.sliceTail != currentTail) {
globalSlice.sliceTail = currentTail;
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java Fri May 13 11:18:19 2011
@@ -122,13 +122,13 @@ public final class DocumentsWriterFlushC
// is super important since we can not address more than 2048 MB per DWPT
setFlushPending(perThread);
if (fullFlush) {
- DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false);
+ DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
assert toBlock != null;
blockedFlushes.add(toBlock);
}
}
}
- final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false);
+ final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
healthiness.updateStalled(this);
return flushingDWPT;
}
@@ -189,18 +189,15 @@ public final class DocumentsWriterFlushC
}
synchronized DocumentsWriterPerThread tryCheckoutForFlush(
- ThreadState perThread, boolean setPending) {
+ ThreadState perThread) {
if (fullFlush) {
return null;
}
- return internalTryCheckOutForFlush(perThread, setPending);
+ return internalTryCheckOutForFlush(perThread);
}
private DocumentsWriterPerThread internalTryCheckOutForFlush(
- ThreadState perThread, boolean setPending) {
- if (setPending && !perThread.flushPending) {
- setFlushPending(perThread);
- }
+ ThreadState perThread) {
if (perThread.flushPending) {
// We are pending so all memory is already moved to flushBytes
if (perThread.tryLock()) {
@@ -245,7 +242,7 @@ public final class DocumentsWriterFlushC
while (allActiveThreads.hasNext() && numPending > 0) {
ThreadState next = allActiveThreads.next();
if (next.flushPending) {
- final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false);
+ final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next);
if (dwpt != null) {
return dwpt;
}
@@ -330,7 +327,12 @@ public final class DocumentsWriterFlushC
}
if (next.perThread.getNumDocsInRAM() > 0 ) {
final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
- final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true);
+ synchronized (this) {
+ if (!next.flushPending) {
+ setFlushPending(next);
+ }
+ }
+ final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
assert dwpt == flushingDWPT : "flushControl returned different DWPT";
toFlush.add(flushingDWPT);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Fri May 13 11:18:19 2011
@@ -163,7 +163,7 @@ public class DocumentsWriterPerThread {
boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting
private FieldInfos fieldInfos;
- private final PrintStream infoStream;
+ private PrintStream infoStream;
private int numDocsInRAM;
private int flushedDocCount;
DocumentsWriterDeleteQueue deleteQueue;
@@ -235,6 +235,7 @@ public class DocumentsWriterPerThread {
// mark document as deleted
deleteDocID(docState.docID);
numDocsInRAM++;
+ fieldInfos.revertUncommitted();
} else {
abort();
}
@@ -377,15 +378,12 @@ public class DocumentsWriterPerThread {
boolean success = false;
try {
-
- SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
consumer.flush(flushState);
pendingDeletes.terms.clear();
- newSegment.setHasVectors(flushState.hasVectors);
-
+ final SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, flushState.segmentCodecs, fieldInfos.asReadOnly());
if (infoStream != null) {
message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
- message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+ message("new segment has " + (newSegment.getHasVectors() ? "vectors" : "no vectors"));
message("flushedFiles=" + newSegment.files());
message("flushed codecs=" + newSegment.getSegmentCodecs());
}
@@ -435,10 +433,6 @@ public class DocumentsWriterPerThread {
return bytesUsed.get() + pendingDeletes.bytesUsed.get();
}
- FieldInfos getFieldInfos() {
- return fieldInfos;
- }
-
void message(String message) {
writer.message("DWPT: " + message);
}
@@ -498,4 +492,9 @@ public class DocumentsWriterPerThread {
assert segment != null;
return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
}
+
+ void setInfoStream(PrintStream infoStream) {
+ this.infoStream = infoStream;
+ docState.infoStream = infoStream;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfo.java Fri May 13 11:18:19 2011
@@ -22,7 +22,6 @@ import org.apache.lucene.index.values.Ty
/** @lucene.experimental */
public final class FieldInfo {
public static final int UNASSIGNED_CODEC_ID = -1;
-
public final String name;
public final int number;
@@ -113,7 +112,6 @@ public final class FieldInfo {
}
assert !this.omitTermFreqAndPositions || !this.storePayloads;
}
-
void setDocValues(Type v) {
if (docValues == null) {
docValues = v;
@@ -127,4 +125,29 @@ public final class FieldInfo {
public Type getDocValues() {
return docValues;
}
+
+ private boolean vectorsCommitted;
+
+ /**
+ * Reverts all uncommitted changes on this {@link FieldInfo}
+ * @see #commitVectors()
+ */
+ void revertUncommitted() {
+ if (storeTermVector && !vectorsCommitted) {
+ storeOffsetWithTermVector = false;
+ storePositionWithTermVector = false;
+ storeTermVector = false;
+ }
+ }
+
+ /**
+ * Commits term vector modifications. Changes to term-vectors must be
+ * explicitly committed once the necessary files are created. If those changes
+ * are not committed subsequent {@link #revertUncommitted()} will reset the
+ * all term-vector flags before the next document.
+ */
+ void commitVectors() {
+ assert storeTermVector;
+ vectorsCommitted = true;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldInfos.java Fri May 13 11:18:19 2011
@@ -220,6 +220,10 @@ public final class FieldInfos implements
static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;
private int format;
+ private boolean hasProx; // only set if readonly
+ private boolean hasVectors; // only set if readonly
+ private long version; // internal use to track changes
+
/**
* Creates a new {@link FieldInfos} instance with a private
@@ -267,7 +271,7 @@ public final class FieldInfos implements
*/
public FieldInfos(Directory d, String name) throws IOException {
this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
- IndexInput input = d.openInput(name);
+ final IndexInput input = d.openInput(name);
try {
read(input, name);
} finally {
@@ -303,6 +307,9 @@ public final class FieldInfos implements
@Override
synchronized public Object clone() {
FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
+ fis.format = format;
+ fis.hasProx = hasProx;
+ fis.hasVectors = hasVectors;
for (FieldInfo fi : this) {
FieldInfo clone = (FieldInfo) (fi).clone();
fis.putInternal(clone);
@@ -312,6 +319,10 @@ public final class FieldInfos implements
/** Returns true if any fields do not omitTermFreqAndPositions */
public boolean hasProx() {
+ if (isReadOnly()) {
+ return hasProx;
+ }
+ // mutable FIs must check!
for (FieldInfo fi : this) {
if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
return true;
@@ -445,6 +456,7 @@ public final class FieldInfos implements
if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
segmentCodecsBuilder.tryAddAndSet(fi);
}
+ version++;
return fi;
}
@@ -514,6 +526,10 @@ public final class FieldInfos implements
}
public boolean hasVectors() {
+ if (isReadOnly()) {
+ return hasVectors;
+ }
+ // mutable FIs must check
for (FieldInfo fi : this) {
if (fi.storeTermVector) {
return true;
@@ -566,6 +582,10 @@ public final class FieldInfos implements
public final boolean isReadOnly() {
return globalFieldNumbers == null;
}
+
+ synchronized final long getVersion() {
+ return version;
+ }
public void write(IndexOutput output) throws IOException {
output.writeVInt(FORMAT_CURRENT);
@@ -658,7 +678,8 @@ public final class FieldInfos implements
if (omitTermFreqAndPositions) {
storePayloads = false;
}
-
+ hasVectors |= storeTermVector;
+ hasProx |= isIndexed && !omitTermFreqAndPositions;
Type docValuesType = null;
if (format <= FORMAT_INDEX_VALUES) {
final byte b = input.readByte();
@@ -705,5 +726,29 @@ public final class FieldInfos implements
throw new CorruptIndexException("did not read all bytes from file \"" + fileName + "\": read " + input.getFilePointer() + " vs size " + input.length());
}
}
+
+ /**
+ * Reverts all uncommitted changes
+ * @see FieldInfo#revertUncommitted()
+ */
+ void revertUncommitted() {
+ for (FieldInfo fieldInfo : this) {
+ fieldInfo.revertUncommitted();
+ }
+ }
+
+ final FieldInfos asReadOnly() {
+ if (isReadOnly()) {
+ return this;
+ }
+ final FieldInfos roFis = new FieldInfos((FieldNumberBiMap)null, null);
+ for (FieldInfo fieldInfo : this) {
+ FieldInfo clone = (FieldInfo) (fieldInfo).clone();
+ roFis.putInternal(clone);
+ roFis.hasVectors |= clone.storeTermVector;
+ roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
+ }
+ return roFis;
+ }
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsReader.java Fri May 13 11:18:19 2011
@@ -24,10 +24,11 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.CloseableThreadLocal;
import java.io.IOException;
@@ -212,40 +213,39 @@ public final class FieldsReader implemen
Document doc = new Document();
int numFields = fieldsStream.readVInt();
- for (int i = 0; i < numFields; i++) {
+ out: for (int i = 0; i < numFields; i++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
- byte bits = fieldsStream.readByte();
- assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+ int bits = fieldsStream.readByte() & 0xFF;
+ assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
- //TODO: Find an alternative approach here if this list continues to grow beyond the
- //list of 5 or 6 currently here. See Lucene 762 for discussion
- if (acceptField.equals(FieldSelectorResult.LOAD)) {
- addField(doc, fi, binary, tokenize);
- }
- else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
- addField(doc, fi, binary, tokenize);
- break;//Get out of this loop
- }
- else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
- addFieldLazy(doc, fi, binary, tokenize, true);
- }
- else if (acceptField.equals(FieldSelectorResult.LATENT)) {
- addFieldLazy(doc, fi, binary, tokenize, false);
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE)){
- skipField(addFieldSize(doc, fi, binary));
- }
- else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
- addFieldSize(doc, fi, binary);
- break;
- }
- else {
- skipField();
+ final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
+
+ switch (acceptField) {
+ case LOAD:
+ addField(doc, fi, binary, tokenize, numeric);
+ break;
+ case LOAD_AND_BREAK:
+ addField(doc, fi, binary, tokenize, numeric);
+ break out; //Get out of this loop
+ case LAZY_LOAD:
+ addFieldLazy(doc, fi, binary, tokenize, true, numeric);
+ break;
+ case LATENT:
+ addFieldLazy(doc, fi, binary, tokenize, false, numeric);
+ break;
+ case SIZE:
+ skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
+ break;
+ case SIZE_AND_BREAK:
+ addFieldSize(doc, fi, binary, numeric);
+ break out; //Get out of this loop
+ default:
+ skipField(numeric);
}
}
@@ -282,72 +282,121 @@ public final class FieldsReader implemen
* Skip the field. We still have to read some of the information about the field, but can skip past the actual content.
* This will have the most payoff on large fields.
*/
- private void skipField() throws IOException {
- skipField(fieldsStream.readVInt());
+ private void skipField(int numeric) throws IOException {
+ final int numBytes;
+ switch(numeric) {
+ case 0:
+ numBytes = fieldsStream.readVInt();
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ numBytes = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ numBytes = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+
+ skipFieldBytes(numBytes);
}
- private void skipField(int toRead) throws IOException {
+ private void skipFieldBytes(int toRead) throws IOException {
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
}
- private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
+ private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
+ assert numeric != 0;
+ switch(numeric) {
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
+ }
+
+ private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
+ final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
- //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
- doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
+ f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
- AbstractField f;
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.seek(pointer+length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
- f.setOmitNorms(fi.omitNorms);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-
- doc.add(f);
}
-
+
+ f.setOmitNorms(fi.omitNorms);
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ doc.add(f);
}
- private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
+ private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
+ final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
- doc.add(new Field(fi.name, b));
+ f = new Field(fi.name, b);
+ } else if (numeric != 0) {
+ f = loadNumericField(fi, numeric);
} else {
- Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
-
- AbstractField f;
f = new Field(fi.name, // name
- false,
- fieldsStream.readString(), // read value
- store,
- index,
- termVector);
- f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
- f.setOmitNorms(fi.omitNorms);
-
- doc.add(f);
+ false,
+ fieldsStream.readString(), // read value
+ Field.Store.YES,
+ index,
+ termVector);
}
+
+ f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+ f.setOmitNorms(fi.omitNorms);
+ doc.add(f);
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
- private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
- int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
+ private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
+ final int bytesize, size;
+ switch(numeric) {
+ case 0:
+ size = fieldsStream.readVInt();
+ bytesize = binary ? size : 2*size;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_INT:
+ case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+ size = bytesize = 4;
+ break;
+ case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+ case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+ size = bytesize = 8;
+ break;
+ default:
+ throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+ }
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
@@ -358,7 +407,7 @@ public final class FieldsReader implemen
}
/**
- * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+ * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
* loaded.
*/
private class LazyField extends AbstractField implements Fieldable {
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/FieldsWriter.java Fri May 13 11:18:19 2011
@@ -21,22 +21,40 @@ import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
- static final byte FIELD_IS_TOKENIZED = 0x1;
- static final byte FIELD_IS_BINARY = 0x2;
+ static final int FIELD_IS_TOKENIZED = 1 << 0;
+ static final int FIELD_IS_BINARY = 1 << 1;
+ // the old bit 1 << 2 was compressed, is now left out
+
+ private static final int _NUMERIC_BIT_SHIFT = 3;
+ static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+ static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+ static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
+ // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
+
+ // the next possible bits are: 1 << 6; 1 << 7
+
// Lucene 3.0: Removal of compressed fields
static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;
+ // Lucene 3.2: NumericFields are stored in binary format
+ static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
// NOTE: if you introduce a new format, make it 1 higher
// than the current one, and always change this if you
// switch to a new format!
- static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+ static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;
// when removing support for old versions, leave the last supported version here
static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
@@ -121,13 +139,26 @@ final class FieldsWriter {
final void writeField(int fieldNumber, Fieldable field) throws IOException {
fieldsStream.writeVInt(fieldNumber);
- byte bits = 0;
+ int bits = 0;
if (field.isTokenized())
- bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+ bits |= FIELD_IS_TOKENIZED;
if (field.isBinary())
- bits |= FieldsWriter.FIELD_IS_BINARY;
-
- fieldsStream.writeByte(bits);
+ bits |= FIELD_IS_BINARY;
+ if (field instanceof NumericField) {
+ switch (((NumericField) field).getDataType()) {
+ case INT:
+ bits |= FIELD_IS_NUMERIC_INT; break;
+ case LONG:
+ bits |= FIELD_IS_NUMERIC_LONG; break;
+ case FLOAT:
+ bits |= FIELD_IS_NUMERIC_FLOAT; break;
+ case DOUBLE:
+ bits |= FIELD_IS_NUMERIC_DOUBLE; break;
+ default:
+ assert false : "Should never get here";
+ }
+ }
+ fieldsStream.writeByte((byte) bits);
if (field.isBinary()) {
final byte[] data;
@@ -139,8 +170,22 @@ final class FieldsWriter {
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
- }
- else {
+ } else if (field instanceof NumericField) {
+ final NumericField nf = (NumericField) field;
+ final Number n = nf.getNumericValue();
+ switch (nf.getDataType()) {
+ case INT:
+ fieldsStream.writeInt(n.intValue()); break;
+ case LONG:
+ fieldsStream.writeLong(n.longValue()); break;
+ case FLOAT:
+ fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break;
+ case DOUBLE:
+ fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break;
+ default:
+ assert false : "Should never get here";
+ }
+ } else {
fieldsStream.writeString(field.stringValue());
}
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java Fri May 13 11:18:19 2011
@@ -22,6 +22,7 @@ import java.io.FilenameFilter;
import java.io.IOException;
import java.io.PrintStream;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
@@ -196,7 +197,31 @@ final class IndexFileDeleter {
}
}
if (sis != null) {
- CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+ final SegmentInfos infos = sis;
+ for (SegmentInfo segmentInfo : infos) {
+ try {
+ /*
+ * Force FI to load for each segment since we could see a
+ * segments file and load successfully above if the files are
+ * still referenced when they are deleted and the os doesn't let
+ * you delete them. Yet its likely that fnm files are removed
+ * while seg file is still around Since LUCENE-2984 we need FI
+ * to find out if a seg has vectors and prox so we need those
+ * files to be opened for a commit point.
+ */
+ segmentInfo.getFieldInfos();
+ } catch (FileNotFoundException e) {
+ refresh(segmentInfo.name);
+ sis = null;
+ if (infoStream != null) {
+ message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+ }
+ }
+ }
+
+ }
+ if (sis != null) {
+ final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
if (sis.getGeneration() == segmentInfos.getGeneration()) {
currentCommitPoint = commitPoint;
}
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexReader.java Fri May 13 11:18:19 2011
@@ -1428,7 +1428,7 @@ public abstract class IndexReader implem
cfr = new CompoundFileReader(dir, filename);
String [] files = cfr.listAll();
- ArrayUtil.quickSort(files); // sort the array of filename so that the output is more readable
+ ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable
for (int i = 0; i < files.length; ++i) {
long len = cfr.fileLength(files[i]);
Modified: lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java?rev=1102677&r1=1102676&r2=1102677&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/docvalues/lucene/src/java/org/apache/lucene/index/IndexWriter.java Fri May 13 11:18:19 2011
@@ -421,7 +421,7 @@ public class IndexWriter implements Clos
private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();
/** Forcefully clear changes for the specified segments. This is called on successful merge. */
- synchronized void clear(SegmentInfos infos) throws IOException {
+ synchronized void clear(List<SegmentInfo> infos) throws IOException {
if (infos == null) {
for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
ent.getValue().hasChanges = false;
@@ -511,7 +511,7 @@ public class IndexWriter implements Clos
return false;
}
- public synchronized void drop(SegmentInfos infos) throws IOException {
+ public synchronized void drop(List<SegmentInfo> infos) throws IOException {
for(SegmentInfo info : infos) {
drop(info);
}
@@ -2355,7 +2355,7 @@ public class IndexWriter implements Clos
String mergedName = newSegmentName();
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
- mergedName, null, codecs, payloadProcessorProvider,
+ mergedName, null, payloadProcessorProvider,
globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
for (IndexReader reader : readers) // add new indexes
@@ -2365,8 +2365,7 @@ public class IndexWriter implements Clos
final FieldInfos fieldInfos = merger.fieldInfos();
SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
- false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
- fieldInfos.hasVectors(),
+ false, merger.getSegmentCodecs(),
fieldInfos);
setDiagnostics(info, "addIndexes(IndexReader...)");
@@ -2729,7 +2728,7 @@ public class IndexWriter implements Clos
assert testPoint("startCommitMergeDeletes");
- final SegmentInfos sourceSegments = merge.segments;
+ final List<SegmentInfo> sourceSegments = merge.segments;
if (infoStream != null)
message("commitMergeDeletes " + merge.segString(directory));
@@ -2741,7 +2740,7 @@ public class IndexWriter implements Clos
long minGen = Long.MAX_VALUE;
for(int i=0; i < sourceSegments.size(); i++) {
- SegmentInfo info = sourceSegments.info(i);
+ SegmentInfo info = sourceSegments.get(i);
minGen = Math.min(info.getBufferedDeletesGen(), minGen);
int docCount = info.docCount;
final SegmentReader previousReader = merge.readerClones.get(i);
@@ -3041,7 +3040,16 @@ public class IndexWriter implements Clos
// is running (while synchronized) to avoid race
// condition where two conflicting merges from different
// threads, start
- message("registerMerge merging=" + mergingSegments);
+ if (infoStream != null) {
+ StringBuilder builder = new StringBuilder("registerMerge merging= [");
+ for (SegmentInfo info : mergingSegments) {
+ builder.append(info.name).append(", ");
+ }
+ builder.append("]");
+ // don't call mergingSegments.toString() could lead to ConcurrentModException
+ // since merge updates the segments FieldInfos
+ message(builder.toString());
+ }
for(SegmentInfo info : merge.segments) {
message("registerMerge info=" + info);
mergingSegments.add(info);
@@ -3094,7 +3102,7 @@ public class IndexWriter implements Clos
// Bind a new segment name here so even with
// ConcurrentMergePolicy we keep deterministic segment
// names.
- merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
+ merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
// Lock order: IW -> BD
final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
@@ -3133,6 +3141,16 @@ public class IndexWriter implements Clos
message("merge seg=" + merge.info.name);
}
+ assert merge.estimatedMergeBytes == 0;
+ for(SegmentInfo info : merge.segments) {
+ if (info.docCount > 0) {
+ final int delCount = numDeletedDocs(info);
+ assert delCount <= info.docCount;
+ final double delRatio = ((double) delCount)/info.docCount;
+ merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
+ }
+ }
+
// TODO: I think this should no longer be needed (we
// now build CFS before adding segment to the infos);
// however, on removing it, tests fail for some reason!
@@ -3174,7 +3192,7 @@ public class IndexWriter implements Clos
// It's possible we are called twice, eg if there was an
// exception inside mergeInit
if (merge.registerDone) {
- final SegmentInfos sourceSegments = merge.segments;
+ final List<SegmentInfo> sourceSegments = merge.segments;
for(SegmentInfo info : sourceSegments) {
mergingSegments.remove(info);
}
@@ -3245,21 +3263,17 @@ public class IndexWriter implements Clos
int mergedDocCount = 0;
- SegmentInfos sourceSegments = merge.segments;
+ List<SegmentInfo> sourceSegments = merge.segments;
SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
- codecs, payloadProcessorProvider,
- merge.info.getFieldInfos());
+ payloadProcessorProvider, merge.info.getFieldInfos());
if (infoStream != null) {
- message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
+ message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getFieldInfos().hasVectors());
}
merge.readers = new ArrayList<SegmentReader>();
merge.readerClones = new ArrayList<SegmentReader>();
-
- merge.estimatedMergeBytes = 0;
-
// This is try/finally to make sure merger's readers are
// closed:
boolean success = false;
@@ -3268,7 +3282,7 @@ public class IndexWriter implements Clos
int segUpto = 0;
while(segUpto < sourceSegments.size()) {
- final SegmentInfo info = sourceSegments.info(segUpto);
+ final SegmentInfo info = sourceSegments.get(segUpto);
// Hold onto the "live" reader; we will use this to
// commit merged deletes
@@ -3277,13 +3291,6 @@ public class IndexWriter implements Clos
-config.getReaderTermsIndexDivisor());
merge.readers.add(reader);
- final int readerMaxDoc = reader.maxDoc();
- if (readerMaxDoc > 0) {
- final int delCount = reader.numDeletedDocs();
- final double delRatio = ((double) delCount)/readerMaxDoc;
- merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
- }
-
// We clone the segment readers because other
// deletes may come in while we're merging so we
// need readers that will not change
@@ -3308,8 +3315,6 @@ public class IndexWriter implements Clos
// Record which codec was used to write the segment
merge.info.setSegmentCodecs(merger.getSegmentCodecs());
- // Record if we have merged vectors
- merge.info.setHasVectors(merger.fieldInfos().hasVectors());
if (infoStream != null) {
message("merge segmentCodecs=" + merger.getSegmentCodecs());
@@ -3323,13 +3328,11 @@ public class IndexWriter implements Clos
// because codec must know if prox was written for
// this segment:
//System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
- merge.info.setHasProx(merger.fieldInfos().hasProx());
-
boolean useCompoundFile;
synchronized (this) { // Guard segmentInfos
useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
}
-
+
if (useCompoundFile) {
success = false;
final String compoundFileName = IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
@@ -3469,14 +3472,14 @@ public class IndexWriter implements Clos
}
/** @lucene.internal */
- public synchronized String segString(SegmentInfos infos) throws IOException {
+ public synchronized String segString(List<SegmentInfo> infos) throws IOException {
StringBuilder buffer = new StringBuilder();
final int count = infos.size();
for(int i = 0; i < count; i++) {
if (i > 0) {
buffer.append(' ');
}
- buffer.append(segString(infos.info(i)));
+ buffer.append(segString(infos.get(i)));
}
return buffer.toString();
@@ -3531,6 +3534,7 @@ public class IndexWriter implements Clos
// called only from assert
private boolean filesExist(SegmentInfos toSync) throws IOException {
+
Collection<String> files = toSync.files(directory, false);
for(final String fileName: files) {
assert directory.fileExists(fileName): "file " + fileName + " does not exist";