You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2011/05/22 23:45:45 UTC
svn commit: r1126234 [5/28] - in /lucene/dev/branches/solr2452: ./ dev-tools/eclipse/ dev-tools/idea/ dev-tools/idea/.idea/ dev-tools/idea/lucene/ dev-tools/idea/lucene/contrib/ant/ dev-tools/idea/lucene/contrib/db/bdb-je/ dev-tools/idea/lucene/contrib...

Modified: lucene/dev/branches/solr2452/lucene/docs/fileformats.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/fileformats.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/fileformats.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/fileformats.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
             Apache Lucene - Index File Formats
@@ -425,11 +425,19 @@ document.write("Last Published: " + docu
 <p>
             In version 3.1, segments records the code version
             that created them. See LUCENE-2720 for details.
+            
+            Additionally segments track explicitly whether or
+            not they have term vectors. See LUCENE-2811 for details.
+           </p>
+<p>
+            In version 3.2, numeric fields are written as natively
+            to stored fields file, previously they were stored in
+            text format only.
            </p>
 </div>
 
         
-<a name="N10037"></a><a name="Definitions"></a>
+<a name="N1003A"></a><a name="Definitions"></a>
 <h2 class="boxed">Definitions</h2>
 <div class="section">
 <p>
@@ -470,7 +478,7 @@ document.write("Last Published: " + docu
                 strings, the first naming the field, and the second naming text
                 within the field.
             </p>
-<a name="N10057"></a><a name="Inverted Indexing"></a>
+<a name="N1005A"></a><a name="Inverted Indexing"></a>
 <h3 class="boxed">Inverted Indexing</h3>
 <p>
                     The index stores statistics about terms in order
@@ -480,7 +488,7 @@ document.write("Last Published: " + docu
                     it.  This is the inverse of the natural relationship, in which
                     documents list terms.
                 </p>
-<a name="N10063"></a><a name="Types of Fields"></a>
+<a name="N10066"></a><a name="Types of Fields"></a>
 <h3 class="boxed">Types of Fields</h3>
 <p>
                     In Lucene, fields may be <i>stored</i>, in which
@@ -494,7 +502,7 @@ document.write("Last Published: " + docu
                     to be indexed literally.
                 </p>
 <p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
-<a name="N10080"></a><a name="Segments"></a>
+<a name="N10083"></a><a name="Segments"></a>
 <h3 class="boxed">Segments</h3>
 <p>
                     Lucene indexes may be composed of multiple sub-indexes, or
@@ -520,7 +528,7 @@ document.write("Last Published: " + docu
                     Searches may involve multiple segments and/or multiple indexes, each
                     index potentially composed of a set of segments.
                 </p>
-<a name="N1009E"></a><a name="Document Numbers"></a>
+<a name="N100A1"></a><a name="Document Numbers"></a>
 <h3 class="boxed">Document Numbers</h3>
 <p>
                     Internally, Lucene refers to documents by an integer <i>document
@@ -575,7 +583,7 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N100C5"></a><a name="Overview"></a>
+<a name="N100C8"></a><a name="Overview"></a>
 <h2 class="boxed">Overview</h2>
 <div class="section">
 <p>
@@ -674,7 +682,7 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N10108"></a><a name="File Naming"></a>
+<a name="N1010B"></a><a name="File Naming"></a>
 <h2 class="boxed">File Naming</h2>
 <div class="section">
 <p>
@@ -701,7 +709,7 @@ document.write("Last Published: " + docu
             </p>
 </div>
       
-<a name="N10117"></a><a name="file-names"></a>
+<a name="N1011A"></a><a name="file-names"></a>
 <h2 class="boxed">Summary of File Extensions</h2>
 <div class="section">
 <p>The following table summarizes the names and extensions of the files in Lucene:
@@ -843,10 +851,10 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N10201"></a><a name="Primitive Types"></a>
+<a name="N10204"></a><a name="Primitive Types"></a>
 <h2 class="boxed">Primitive Types</h2>
 <div class="section">
-<a name="N10206"></a><a name="Byte"></a>
+<a name="N10209"></a><a name="Byte"></a>
 <h3 class="boxed">Byte</h3>
 <p>
                     The most primitive type
@@ -854,7 +862,7 @@ document.write("Last Published: " + docu
                     other data types are defined as sequences
                     of bytes, so file formats are byte-order independent.
                 </p>
-<a name="N1020F"></a><a name="UInt32"></a>
+<a name="N10212"></a><a name="UInt32"></a>
 <h3 class="boxed">UInt32</h3>
 <p>
                     32-bit unsigned integers are written as four
@@ -864,7 +872,7 @@ document.write("Last Published: " + docu
                     UInt32    --&gt; &lt;Byte&gt;<sup>4</sup>
                 
 </p>
-<a name="N1021E"></a><a name="Uint64"></a>
+<a name="N10221"></a><a name="Uint64"></a>
 <h3 class="boxed">Uint64</h3>
 <p>
                     64-bit unsigned integers are written as eight
@@ -873,7 +881,7 @@ document.write("Last Published: " + docu
 <p>UInt64    --&gt; &lt;Byte&gt;<sup>8</sup>
                 
 </p>
-<a name="N1022D"></a><a name="VInt"></a>
+<a name="N10230"></a><a name="VInt"></a>
 <h3 class="boxed">VInt</h3>
 <p>
                     A variable-length format for positive integers is
@@ -1423,13 +1431,13 @@ document.write("Last Published: " + docu
                     This provides compression while still being
                     efficient to decode.
                 </p>
-<a name="N10512"></a><a name="Chars"></a>
+<a name="N10515"></a><a name="Chars"></a>
 <h3 class="boxed">Chars</h3>
 <p>
                     Lucene writes unicode
                     character sequences as UTF-8 encoded bytes.
                 </p>
-<a name="N1051B"></a><a name="String"></a>
+<a name="N1051E"></a><a name="String"></a>
 <h3 class="boxed">String</h3>
 <p>
 		    Lucene writes strings as UTF-8 encoded bytes.
@@ -1442,10 +1450,10 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N10528"></a><a name="Compound Types"></a>
+<a name="N1052B"></a><a name="Compound Types"></a>
 <h2 class="boxed">Compound Types</h2>
 <div class="section">
-<a name="N1052D"></a><a name="MapStringString"></a>
+<a name="N10530"></a><a name="MapStringString"></a>
 <h3 class="boxed">Map&lt;String,String&gt;</h3>
 <p>
 		    In a couple places Lucene stores a Map
@@ -1458,13 +1466,13 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N1053D"></a><a name="Per-Index Files"></a>
+<a name="N10540"></a><a name="Per-Index Files"></a>
 <h2 class="boxed">Per-Index Files</h2>
 <div class="section">
 <p>
                 The files in this section exist one-per-index.
             </p>
-<a name="N10545"></a><a name="Segments File"></a>
+<a name="N10548"></a><a name="Segments File"></a>
 <h3 class="boxed">Segments File</h3>
 <p>
                     The active segments in the index are stored in the
@@ -1508,7 +1516,7 @@ document.write("Last Published: " + docu
 <b>3.1</b>
                     Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
                     NormGen<sup>NumField</sup>,
-                    IsCompoundFile, DeletionCount, HasProx, Diagnostics&gt;<sup>SegCount</sup>, CommitUserData, Checksum
+                    IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors&gt;<sup>SegCount</sup>, CommitUserData, Checksum
                 </p>
 <p>
                     Format, NameCounter, SegCount, SegSize, NumField,
@@ -1525,7 +1533,7 @@ document.write("Last Published: " + docu
 		</p>
 <p>
                     IsCompoundFile, HasSingleNormFile,
-                    DocStoreIsCompoundFile, HasProx --&gt; Int8
+                    DocStoreIsCompoundFile, HasProx, HasVectors --&gt; Int8
                 </p>
 <p>
 		    CommitUserData --&gt; Map&lt;String,String&gt;
@@ -1634,7 +1642,10 @@ document.write("Last Published: " + docu
 		    Lucene version, OS, Java version, why the segment
 		    was created (merge, flush, addIndexes), etc.
                 </p>
-<a name="N105CD"></a><a name="Lock File"></a>
+<p> HasVectors is 1 if this segment stores term vectors,
+            else it's 0.
+                </p>
+<a name="N105D3"></a><a name="Lock File"></a>
 <h3 class="boxed">Lock File</h3>
 <p>
                     The write lock, which is stored in the index
@@ -1648,14 +1659,14 @@ document.write("Last Published: " + docu
                     documents).  This lock file ensures that only one
                     writer is modifying the index at a time.
                 </p>
-<a name="N105D6"></a><a name="Deletable File"></a>
+<a name="N105DC"></a><a name="Deletable File"></a>
 <h3 class="boxed">Deletable File</h3>
 <p>
                     A writer dynamically computes
                     the files that are deletable, instead, so no file
                     is written.
                 </p>
-<a name="N105DF"></a><a name="Compound Files"></a>
+<a name="N105E5"></a><a name="Compound Files"></a>
 <h3 class="boxed">Compound Files</h3>
 <p>Starting with Lucene 1.4 the compound file format became default. This
                     is simply a container for all files described in the next section
@@ -1682,14 +1693,14 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N10607"></a><a name="Per-Segment Files"></a>
+<a name="N1060D"></a><a name="Per-Segment Files"></a>
 <h2 class="boxed">Per-Segment Files</h2>
 <div class="section">
 <p>
                 The remaining files are all per-segment, and are
                 thus defined by suffix.
             </p>
-<a name="N1060F"></a><a name="Fields"></a>
+<a name="N10615"></a><a name="Fields"></a>
 <h3 class="boxed">Fields</h3>
 <p>
                     
@@ -1862,13 +1873,29 @@ document.write("Last Published: " + docu
 <li>third bit is one for fields with compression option enabled
                                     (if compression is enabled, the algorithm used is ZLIB),
                                     only available for indexes until Lucene version 2.9.x</li>
+                                
+<li>4th to 6th bits (mask: 0x7&lt;&lt;3) define the type of a
+                                numeric field: <ul>
+                                  
+<li>all bits in mask are cleared if no numeric field at all</li>
+                                  
+<li>1&lt;&lt;3: Value is Int</li>
+                                  
+<li>2&lt;&lt;3: Value is Long</li>
+                                  
+<li>3&lt;&lt;3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+                                  
+<li>4&lt;&lt;3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+                                
+</ul>
+</li>
                             
 </ul>
                         
 </p>
                         
 <p>Value --&gt;
-                            String | BinaryValue (depending on Bits)
+                            String | BinaryValue | Int | Long (depending on Bits)
                         </p>
                         
 <p>BinaryValue --&gt;
@@ -1883,7 +1910,7 @@ document.write("Last Published: " + docu
 </li>
                 
 </ol>
-<a name="N106B6"></a><a name="Term Dictionary"></a>
+<a name="N106D0"></a><a name="Term Dictionary"></a>
 <h3 class="boxed">Term Dictionary</h3>
 <p>
                     The term dictionary is represented as two files:
@@ -2075,7 +2102,7 @@ document.write("Last Published: " + docu
 </li>
                 
 </ol>
-<a name="N1073A"></a><a name="Frequencies"></a>
+<a name="N10754"></a><a name="Frequencies"></a>
 <h3 class="boxed">Frequencies</h3>
 <p>
                     The .frq file contains the lists of documents
@@ -2203,7 +2230,7 @@ document.write("Last Published: " + docu
                    entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
                    to entry 31 on level 0.                   
                 </p>
-<a name="N107C2"></a><a name="Positions"></a>
+<a name="N107DC"></a><a name="Positions"></a>
 <h3 class="boxed">Positions</h3>
 <p>
                     The .prx file contains the lists of positions that
@@ -2273,7 +2300,7 @@ document.write("Last Published: " + docu
                     Payload. If PayloadLength is not stored, then this Payload has the same
                     length as the Payload at the previous position.
                 </p>
-<a name="N107FE"></a><a name="Normalization Factors"></a>
+<a name="N10818"></a><a name="Normalization Factors"></a>
 <h3 class="boxed">Normalization Factors</h3>
 <p>There's a single .nrm file containing all norms:
                 </p>
@@ -2353,7 +2380,7 @@ document.write("Last Published: " + docu
                 </p>
 <p>Separate norm files are created (when adequate) for both compound and non compound segments.
                 </p>
-<a name="N1084F"></a><a name="Term Vectors"></a>
+<a name="N10869"></a><a name="Term Vectors"></a>
 <h3 class="boxed">Term Vectors</h3>
 <p>
 		  Term Vector support is an optional on a field by
@@ -2489,7 +2516,7 @@ document.write("Last Published: " + docu
 </li>
                 
 </ol>
-<a name="N108EB"></a><a name="Deleted Documents"></a>
+<a name="N10905"></a><a name="Deleted Documents"></a>
 <h3 class="boxed">Deleted Documents</h3>
 <p>The .del file is
                     optional, and only exists when a segment contains deletions.
@@ -2553,7 +2580,7 @@ document.write("Last Published: " + docu
 </div>
 
         
-<a name="N10925"></a><a name="Limitations"></a>
+<a name="N1093F"></a><a name="Limitations"></a>
 <h2 class="boxed">Limitations</h2>
 <div class="section">
 <p>

Modified: lucene/dev/branches/solr2452/lucene/docs/fileformats.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/fileformats.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/fileformats.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/fileformats.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/gettingstarted.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/gettingstarted.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/gettingstarted.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/gettingstarted.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Getting Started Guide
@@ -268,15 +268,13 @@ may wish to skip sections.
 	
 <li>
 <a href="demo.html">About the command-line Lucene demo and its usage</a>.  This section
-	is intended for anyone who wants to use the command-line Lucene demo.</li> 
-<p></p>
+	is intended for anyone who wants to use the command-line Lucene demo.</li>
 
 	
 <li>
 <a href="demo2.html">About the sources and implementation for the command-line Lucene
 	demo</a>.  This section walks through the implementation details (sources) of the
-	command-line Lucene demo.  This section is intended for developers.</li> 
-<p></p>
+	command-line Lucene demo.  This section is intended for developers.</li>
 
 </ul>
 </div>

Modified: lucene/dev/branches/solr2452/lucene/docs/gettingstarted.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/gettingstarted.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/gettingstarted.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/gettingstarted.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/index.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/index.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/index.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Lucene Java Documentation</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">

Modified: lucene/dev/branches/solr2452/lucene/docs/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/index.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/index.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/index.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/linkmap.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/linkmap.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/linkmap.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/linkmap.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Site Linkmap Table of Contents</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">

Modified: lucene/dev/branches/solr2452/lucene/docs/linkmap.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/linkmap.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/linkmap.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/linkmap.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	        Apache Lucene - Lucene Contrib

Modified: lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/lucene-contrib/index.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Query Parser Syntax

Modified: lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/queryparsersyntax.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/scoring.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/scoring.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/scoring.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/scoring.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Scoring

Modified: lucene/dev/branches/solr2452/lucene/docs/scoring.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/scoring.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Files lucene/dev/branches/solr2452/lucene/docs/scoring.pdf (original) and lucene/dev/branches/solr2452/lucene/docs/scoring.pdf Sun May 22 21:45:19 2011 differ

Modified: lucene/dev/branches/solr2452/lucene/docs/systemrequirements.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/systemrequirements.html?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/docs/systemrequirements.html (original)
+++ lucene/dev/branches/solr2452/lucene/docs/systemrequirements.html Sun May 22 21:45:19 2011
@@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Apache Lucene - System Requirements</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">

Modified: lucene/dev/branches/solr2452/lucene/docs/systemrequirements.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/docs/systemrequirements.pdf?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/Document.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/Document.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/Document.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/Document.java Sun May 22 21:45:19 2011
@@ -131,8 +131,13 @@ public final class Document {
   /** Returns a field with the given name if any exist in this document, or
    * null.  If multiple fields exists with this name, this method returns the
    * first value added.
-   * Do not use this method with lazy loaded fields.
+   * Do not use this method with lazy loaded fields or {@link NumericField}.
+   * @deprecated use {@link #getFieldable} instead and cast depending on
+   * data type.
+   * @throws ClassCastException if you try to retrieve a numerical or
+   * lazy loaded field.
    */
+  @Deprecated
   public final Field getField(String name) {
     return (Field) getFieldable(name);
   }
@@ -154,6 +159,8 @@ public final class Document {
    * this document, or null.  If multiple fields exist with this name, this
    * method returns the first value added. If only binary fields with this name
    * exist, returns null.
+   * For {@link NumericField} it returns the string value of the number. If you want
+   * the actual {@code NumericField} instance back, use {@link #getFieldable}.
    */
   public final String get(String name) {
    for (Fieldable field : fields) {
@@ -177,13 +184,18 @@ public final class Document {
   
   /**
    * Returns an array of {@link Field}s with the given name.
-   * Do not use with lazy loaded fields.
    * This method returns an empty array when there are no
    * matching fields.  It never returns null.
+   * Do not use this method with lazy loaded fields or {@link NumericField}.
    *
    * @param name the name of the field
    * @return a <code>Field[]</code> array
+   * @deprecated use {@link #getFieldable} instead and cast depending on
+   * data type.
+   * @throws ClassCastException if you try to retrieve a numerical or
+   * lazy loaded field.
    */
+   @Deprecated
    public final Field[] getFields(String name) {
      List<Field> result = new ArrayList<Field>();
      for (Fieldable field : fields) {
@@ -230,6 +242,8 @@ public final class Document {
    * Returns an array of values of the field specified as the method parameter.
    * This method returns an empty array when there are no
    * matching fields.  It never returns null.
+   * For {@link NumericField}s it returns the string value of the number. If you want
+   * the actual {@code NumericField} instances back, use {@link #getFieldables}.
    * @param name the name of the field
    * @return a <code>String[]</code> of field values
    */

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/NumericField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/NumericField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/NumericField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/document/NumericField.java Sun May 22 21:45:19 2011
@@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCac
  * class is a wrapper around this token stream type for
  * easier, more intuitive usage.</p>
  *
- * <p><b>NOTE:</b> This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * <code>toString(value)</code> of the used data type).
- *
  * @since 2.9
  */
 public final class NumericField extends AbstractField {
 
-  private final NumericTokenStream numericTS;
+  /** Data type of the value in {@link NumericField}.
+   * @since 3.2
+   */
+  public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+
+  private transient NumericTokenStream numericTS;
+  private DataType type;
+  private final int precisionStep;
 
   /**
    * Creates a field for numeric values using the default <code>precisionStep</code>
@@ -158,8 +158,8 @@ public final class NumericField extends 
    * a numeric value, before indexing a document containing this field,
    * set a value using the various set<em>???</em>Value() methods.
    * @param name the field name
-   * @param store if the field should be stored in plain text form
-   *  (according to <code>toString(value)</code> of the used data type)
+   * @param store if the field should be stored, {@link Document#getFieldable}
+   * then returns {@code NumericField} instances on search results.
    * @param index if the field should be indexed using {@link NumericTokenStream}
    */
   public NumericField(String name, Field.Store store, boolean index) {
@@ -186,19 +186,43 @@ public final class NumericField extends 
    * set a value using the various set<em>???</em>Value() methods.
    * @param name the field name
    * @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
-   * @param store if the field should be stored in plain text form
-   *  (according to <code>toString(value)</code> of the used data type)
+   * @param store if the field should be stored, {@link Document#getFieldable}
+   * then returns {@code NumericField} instances on search results.
    * @param index if the field should be indexed using {@link NumericTokenStream}
    */
   public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
     super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+    this.precisionStep = precisionStep;
     setOmitTermFreqAndPositions(true);
-    numericTS = new NumericTokenStream(precisionStep);
   }
 
   /** Returns a {@link NumericTokenStream} for indexing the numeric value. */
   public TokenStream tokenStreamValue()   {
-    return isIndexed() ? numericTS : null;
+    if (!isIndexed())
+      return null;
+    if (numericTS == null) {
+      // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+      // if not needed (stored field loading)
+      numericTS = new NumericTokenStream(precisionStep);
+      // initialize value in TokenStream
+      if (fieldsData != null) {
+        assert type != null;
+        final Number val = (Number) fieldsData;
+        switch (type) {
+          case INT:
+            numericTS.setIntValue(val.intValue()); break;
+          case LONG:
+            numericTS.setLongValue(val.longValue()); break;
+          case FLOAT:
+            numericTS.setFloatValue(val.floatValue()); break;
+          case DOUBLE:
+            numericTS.setDoubleValue(val.doubleValue()); break;
+          default:
+            assert false : "Should never get here";
+        }
+      }
+    }
+    return numericTS;
   }
   
   /** Returns always <code>null</code> for numeric fields */
@@ -212,7 +236,10 @@ public final class NumericField extends 
     return null;
   }
     
-  /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+  /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+   * on search results. It is recommended to use {@link Document#getFieldable} instead
+   * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+   * to return the stored value. */
   public String stringValue()   {
     return (fieldsData == null) ? null : fieldsData.toString();
   }
@@ -224,7 +251,14 @@ public final class NumericField extends 
   
   /** Returns the precision step. */
   public int getPrecisionStep() {
-    return numericTS.getPrecisionStep();
+    return precisionStep;
+  }
+  
+  /** Returns the data type of the current value, {@code null} if not yet set.
+   * @since 3.2
+   */
+  public DataType getDataType() {
+    return type;
   }
   
   /**
@@ -234,8 +268,9 @@ public final class NumericField extends 
    * <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
    */
   public NumericField setLongValue(final long value) {
-    numericTS.setLongValue(value);
+    if (numericTS != null) numericTS.setLongValue(value);
     fieldsData = Long.valueOf(value);
+    type = DataType.LONG;
     return this;
   }
   
@@ -246,8 +281,9 @@ public final class NumericField extends 
    * <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
    */
   public NumericField setIntValue(final int value) {
-    numericTS.setIntValue(value);
+    if (numericTS != null) numericTS.setIntValue(value);
     fieldsData = Integer.valueOf(value);
+    type = DataType.INT;
     return this;
   }
   
@@ -258,8 +294,9 @@ public final class NumericField extends 
    * <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
    */
   public NumericField setDoubleValue(final double value) {
-    numericTS.setDoubleValue(value);
+    if (numericTS != null) numericTS.setDoubleValue(value);
     fieldsData = Double.valueOf(value);
+    type = DataType.DOUBLE;
     return this;
   }
   
@@ -270,8 +307,9 @@ public final class NumericField extends 
    * <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
    */
   public NumericField setFloatValue(final float value) {
-    numericTS.setFloatValue(value);
+    if (numericTS != null) numericTS.setFloatValue(value);
     fieldsData = Float.valueOf(value);
+    type = DataType.FLOAT;
     return this;
   }
 

Copied: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java (from r1125822, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java?p2=lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletes.java&r1=1125822&r2=1126234&rev=1126234&view=diff
==============================================================================
    (empty)

Copied: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java (from r1125822, lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java?p2=lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java&p1=lucene/dev/trunk/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java&r1=1125822&r2=1126234&rev=1126234&view=diff
==============================================================================
    (empty)

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ByteSliceWriter.java Sun May 22 21:45:19 2011
@@ -81,6 +81,6 @@ final class ByteSliceWriter extends Data
   }
 
   public int getAddress() {
-    return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
+    return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK);
   }
 }
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CheckIndex.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CheckIndex.java Sun May 22 21:45:19 2011
@@ -661,10 +661,13 @@ public class CheckIndex {
           status.termCount++;
 
           final DocsEnum docs2;
+          final boolean hasPositions;
           if (postings != null) {
             docs2 = postings;
+            hasPositions = true;
           } else {
             docs2 = docs;
+            hasPositions = false;
           }
 
           int lastDoc = -1;
@@ -733,6 +736,67 @@ public class CheckIndex {
               throw new RuntimeException("term " + term + " totalTermFreq=" + totalTermFreq2 + " != recomputed totalTermFreq=" + totalTermFreq);
             }
           }
+
+          // Test skipping
+          if (docFreq >= 16) {
+            if (hasPositions) {
+              for(int idx=0;idx<7;idx++) {
+                final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
+                postings = terms.docsAndPositions(delDocs, postings);
+                final int docID = postings.advance(skipDocID);
+                if (docID == DocsEnum.NO_MORE_DOCS) {
+                  break;
+                } else {
+                  if (docID < skipDocID) {
+                    throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
+                  }
+                  final int freq = postings.freq();
+                  if (freq <= 0) {
+                    throw new RuntimeException("termFreq " + freq + " is out of bounds");
+                  }
+                  int lastPosition = -1;
+                  for(int posUpto=0;posUpto<freq;posUpto++) {
+                    final int pos = postings.nextPosition();
+                    if (pos < 0) {
+                      throw new RuntimeException("position " + pos + " is out of bounds");
+                    }
+                    if (pos <= lastPosition) {
+                      throw new RuntimeException("position " + pos + " is <= lastPosition " + lastPosition);
+                    }
+                    lastPosition = pos;
+                  } 
+
+                  final int nextDocID = postings.nextDoc();
+                  if (nextDocID == DocsEnum.NO_MORE_DOCS) {
+                    break;
+                  }
+                  if (nextDocID <= docID) {
+                    throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
+                  }
+                }
+              }
+            } else {
+              for(int idx=0;idx<7;idx++) {
+                final int skipDocID = (int) (((idx+1)*(long) maxDoc)/8);
+                docs = terms.docs(delDocs, docs);
+                final int docID = docs.advance(skipDocID);
+                if (docID == DocsEnum.NO_MORE_DOCS) {
+                  break;
+                } else {
+                  if (docID < skipDocID) {
+                    throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + ") returned docID=" + docID);
+                  }
+                  final int nextDocID = docs.nextDoc();
+                  if (nextDocID == DocsEnum.NO_MORE_DOCS) {
+                    break;
+                  }
+                  if (nextDocID <= docID) {
+                    throw new RuntimeException("term " + term + ": advance(docID=" + skipDocID + "), then .next() returned docID=" + nextDocID + " vs prev docID=" + docID);
+                  }
+                }
+              }
+            }
+          }
         }
 
         if (sumTotalTermFreq != 0) {

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileReader.java Sun May 22 21:45:19 2011
@@ -189,12 +189,6 @@ public class CompoundFileReader extends 
         return directory.fileModified(fileName);
     }
 
-    /** Set the modified time of the compound file to now. */
-    @Override
-    public void touchFile(String name) throws IOException {
-        directory.touchFile(fileName);
-    }
-
     /** Not implemented
      * @throws UnsupportedOperationException */
     @Override

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java Sun May 22 21:45:19 2011
@@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
  * file. The {directory} that follows has that many entries. Each directory entry
  * contains a long pointer to the start of this file's data section, and a String
  * with that file's name.
+ * 
+ * @lucene.internal
  */
-final class CompoundFileWriter {
+public final class CompoundFileWriter {
 
     private static final class FileEntry {
         /** source file */
@@ -136,8 +138,7 @@ final class CompoundFileWriter {
 
     /** Merge files with the extensions added up to now.
      *  All files with these extensions are combined sequentially into the
-     *  compound stream. After successful merge, the source files
-     *  are deleted.
+     *  compound stream.
      *  @throws IllegalStateException if close() had been called before or
      *   if no file has been added to this object
      */

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Sun May 22 21:45:19 2011
@@ -135,8 +135,8 @@ public class ConcurrentMergeScheduler ex
       final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
       final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
       
-      final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount();
-      final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount();
+      final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount;
+      final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount;
 
       return c2 - c1;
     }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DirectoryReader.java Sun May 22 21:45:19 2011
@@ -732,8 +732,7 @@ class DirectoryReader extends IndexReade
       // case we have to roll back:
       startCommit();
 
-      final SegmentInfos rollbackSegmentInfos = new SegmentInfos();
-      rollbackSegmentInfos.addAll(segmentInfos);
+      final List<SegmentInfo> rollbackSegments = segmentInfos.createBackupSegmentInfos(false);
 
       boolean success = false;
       try {
@@ -765,8 +764,7 @@ class DirectoryReader extends IndexReade
           deleter.refresh();
 
           // Restore all SegmentInfos (in case we pruned some)
-          segmentInfos.clear();
-          segmentInfos.addAll(rollbackSegmentInfos);
+          segmentInfos.rollbackSegmentInfos(rollbackSegments);
         }
       }
 

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocConsumer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocConsumer.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocConsumer.java Sun May 22 21:45:19 2011
@@ -18,11 +18,12 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Collection;
 
 abstract class DocConsumer {
-  abstract DocConsumerPerThread addThread(DocumentsWriterThreadState perThread) throws IOException;
-  abstract void flush(final Collection<DocConsumerPerThread> threads, final SegmentWriteState state) throws IOException;
+  abstract void processDocument(FieldInfos fieldInfos) throws IOException;
+  abstract void finishDocument() throws IOException;
+  abstract void flush(final SegmentWriteState state) throws IOException;
   abstract void abort();
   abstract boolean freeRAM();
+  abstract void doAfterFlush();
 }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumer.java Sun May 22 21:45:19 2011
@@ -18,22 +18,25 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Collection;
 import java.util.Map;
 
 abstract class DocFieldConsumer {
-  /** Called when DocumentsWriter decides to create a new
+  /** Called when DocumentsWriterPerThread decides to create a new
    *  segment */
-  abstract void flush(Map<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException;
+  abstract void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException;
 
   /** Called when an aborting exception is hit */
   abstract void abort();
 
-  /** Add a new thread */
-  abstract DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) throws IOException;
-
-  /** Called when DocumentsWriter is using too much RAM.
+  /** Called when DocumentsWriterPerThread is using too much RAM.
    *  The consumer should free RAM, if possible, returning
    *  true if any RAM was in fact freed. */
   abstract boolean freeRAM();
-  }
+
+  abstract void startDocument() throws IOException;
+
+  abstract DocFieldConsumerPerField addField(FieldInfo fi);
+
+  abstract void finishDocument() throws IOException;
+
+}

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java Sun May 22 21:45:19 2011
@@ -24,4 +24,5 @@ abstract class DocFieldConsumerPerField 
   /** Processes all occurrences of a single field */
   abstract void processFields(Fieldable[] fields, int count) throws IOException;
   abstract void abort();
+  abstract FieldInfo getFieldInfo();
 }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java Sun May 22 21:45:19 2011
@@ -19,8 +19,15 @@ package org.apache.lucene.index;
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.Map;
+import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
 
 
 /**
@@ -33,26 +40,39 @@ import java.util.HashMap;
 
 final class DocFieldProcessor extends DocConsumer {
 
-  final DocumentsWriter docWriter;
   final DocFieldConsumer consumer;
   final StoredFieldsWriter fieldsWriter;
 
-  public DocFieldProcessor(DocumentsWriter docWriter, DocFieldConsumer consumer) {
-    this.docWriter = docWriter;
+  // Holds all fields seen in current doc
+  DocFieldProcessorPerField[] fields = new DocFieldProcessorPerField[1];
+  int fieldCount;
+
+  // Hash table for all fields ever seen
+  DocFieldProcessorPerField[] fieldHash = new DocFieldProcessorPerField[2];
+  int hashMask = 1;
+  int totalFieldCount;
+
+  float docBoost;
+  int fieldGen;
+  final DocumentsWriterPerThread.DocState docState;
+
+  public DocFieldProcessor(DocumentsWriterPerThread docWriter, DocFieldConsumer consumer) {
+    this.docState = docWriter.docState;
     this.consumer = consumer;
     fieldsWriter = new StoredFieldsWriter(docWriter);
   }
 
   @Override
-  public void flush(Collection<DocConsumerPerThread> threads, SegmentWriteState state) throws IOException {
+  public void flush(SegmentWriteState state) throws IOException {
 
-    Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> childThreadsAndFields = new HashMap<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>>();
-    for ( DocConsumerPerThread thread : threads) {
-      DocFieldProcessorPerThread perThread = (DocFieldProcessorPerThread) thread;
-      childThreadsAndFields.put(perThread.consumer, perThread.fields());
+    Map<FieldInfo, DocFieldConsumerPerField> childFields = new HashMap<FieldInfo, DocFieldConsumerPerField>();
+    Collection<DocFieldConsumerPerField> fields = fields();
+    for (DocFieldConsumerPerField f : fields) {
+      childFields.put(f.getFieldInfo(), f);
     }
+
     fieldsWriter.flush(state);
-    consumer.flush(childThreadsAndFields, state);
+    consumer.flush(childFields, state);
 
     // Important to save after asking consumer to flush so
     // consumer can alter the FieldInfo* if necessary.  EG,
@@ -64,8 +84,20 @@ final class DocFieldProcessor extends Do
 
   @Override
   public void abort() {
-    fieldsWriter.abort();
-    consumer.abort();
+    for(int i=0;i<fieldHash.length;i++) {
+      DocFieldProcessorPerField field = fieldHash[i];
+      while(field != null) {
+        final DocFieldProcessorPerField next = field.next;
+        field.abort();
+        field = next;
+      }
+    }
+
+    try {
+      fieldsWriter.abort();
+    } finally {
+      consumer.abort();
+    }
   }
 
   @Override
@@ -73,8 +105,160 @@ final class DocFieldProcessor extends Do
     return consumer.freeRAM();
   }
 
+  public Collection<DocFieldConsumerPerField> fields() {
+    Collection<DocFieldConsumerPerField> fields = new HashSet<DocFieldConsumerPerField>();
+    for(int i=0;i<fieldHash.length;i++) {
+      DocFieldProcessorPerField field = fieldHash[i];
+      while(field != null) {
+        fields.add(field.consumer);
+        field = field.next;
+      }
+    }
+    assert fields.size() == totalFieldCount;
+    return fields;
+  }
+
+  /** In flush we reset the fieldHash to not maintain per-field state
+   *  across segments */
   @Override
-  public DocConsumerPerThread addThread(DocumentsWriterThreadState threadState) throws IOException {
-    return new DocFieldProcessorPerThread(threadState, this);
+  void doAfterFlush() {
+    fieldHash = new DocFieldProcessorPerField[2];
+    hashMask = 1;
+    totalFieldCount = 0;
+  }
+
+  private void rehash() {
+    final int newHashSize = (fieldHash.length*2);
+    assert newHashSize > fieldHash.length;
+
+    final DocFieldProcessorPerField newHashArray[] = new DocFieldProcessorPerField[newHashSize];
+
+    // Rehash
+    int newHashMask = newHashSize-1;
+    for(int j=0;j<fieldHash.length;j++) {
+      DocFieldProcessorPerField fp0 = fieldHash[j];
+      while(fp0 != null) {
+        final int hashPos2 = fp0.fieldInfo.name.hashCode() & newHashMask;
+        DocFieldProcessorPerField nextFP0 = fp0.next;
+        fp0.next = newHashArray[hashPos2];
+        newHashArray[hashPos2] = fp0;
+        fp0 = nextFP0;
+      }
+    }
+
+    fieldHash = newHashArray;
+    hashMask = newHashMask;
   }
+
+  @Override
+  public void processDocument(FieldInfos fieldInfos) throws IOException {
+
+    consumer.startDocument();
+    fieldsWriter.startDocument();
+
+    final Document doc = docState.doc;
+
+    fieldCount = 0;
+
+    final int thisFieldGen = fieldGen++;
+
+    final List<Fieldable> docFields = doc.getFields();
+    final int numDocFields = docFields.size();
+
+    // Absorb any new fields first seen in this document.
+    // Also absorb any changes to fields we had already
+    // seen before (eg suddenly turning on norms or
+    // vectors, etc.):
+
+    for(int i=0;i<numDocFields;i++) {
+      Fieldable field = docFields.get(i);
+      final String fieldName = field.name();
+
+      // Make sure we have a PerField allocated
+      final int hashPos = fieldName.hashCode() & hashMask;
+      DocFieldProcessorPerField fp = fieldHash[hashPos];
+      while(fp != null && !fp.fieldInfo.name.equals(fieldName)) {
+        fp = fp.next;
+      }
+
+      if (fp == null) {
+
+        // TODO FI: we need to genericize the "flags" that a
+        // field holds, and, how these flags are merged; it
+        // needs to be more "pluggable" such that if I want
+        // to have a new "thing" my Fields can do, I can
+        // easily add it
+        FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
+                                      field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+                                      field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+
+        fp = new DocFieldProcessorPerField(this, fi);
+        fp.next = fieldHash[hashPos];
+        fieldHash[hashPos] = fp;
+        totalFieldCount++;
+
+        if (totalFieldCount >= fieldHash.length/2)
+          rehash();
+      } else {
+        fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
+                            field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
+                            field.getOmitNorms(), false, field.getOmitTermFreqAndPositions());
+      }
+
+      if (thisFieldGen != fp.lastGen) {
+
+        // First time we're seeing this field for this doc
+        fp.fieldCount = 0;
+
+        if (fieldCount == fields.length) {
+          final int newSize = fields.length*2;
+          DocFieldProcessorPerField newArray[] = new DocFieldProcessorPerField[newSize];
+          System.arraycopy(fields, 0, newArray, 0, fieldCount);
+          fields = newArray;
+        }
+
+        fields[fieldCount++] = fp;
+        fp.lastGen = thisFieldGen;
+      }
+
+      fp.addField(field);
+
+      if (field.isStored()) {
+        fieldsWriter.addField(field, fp.fieldInfo);
+      }
+    }
+
+    // If we are writing vectors then we must visit
+    // fields in sorted order so they are written in
+    // sorted order.  TODO: we actually only need to
+    // sort the subset of fields that have vectors
+    // enabled; we could save [small amount of] CPU
+    // here.
+    ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
+    for(int i=0;i<fieldCount;i++) {
+      final DocFieldProcessorPerField perField = fields[i];
+      perField.consumer.processFields(perField.fields, perField.fieldCount);
+    }
+
+    if (docState.maxTermPrefix != null && docState.infoStream != null) {
+      docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
+      docState.maxTermPrefix = null;
+    }
+  }
+
+  private static final Comparator<DocFieldProcessorPerField> fieldsComp = new Comparator<DocFieldProcessorPerField>() {
+    public int compare(DocFieldProcessorPerField o1, DocFieldProcessorPerField o2) {
+      return o1.fieldInfo.name.compareTo(o2.fieldInfo.name);
+    }
+  };
+  
+  @Override
+  void finishDocument() throws IOException {
+    try {
+      fieldsWriter.finishDocument();
+    } finally {
+      consumer.finishDocument();
+    }
+  }
+
 }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java Sun May 22 21:45:19 2011
@@ -18,6 +18,8 @@ package org.apache.lucene.index;
  */
 
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.RamUsageEstimator;
 
 /**
  * Holds all per thread, per field state.
@@ -34,11 +36,22 @@ final class DocFieldProcessorPerField {
   int fieldCount;
   Fieldable[] fields = new Fieldable[1];
 
-  public DocFieldProcessorPerField(final DocFieldProcessorPerThread perThread, final FieldInfo fieldInfo) {
-    this.consumer = perThread.consumer.addField(fieldInfo);
+  public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
+    this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
     this.fieldInfo = fieldInfo;
   }
 
+  public void addField(Fieldable field) {
+    if (fieldCount == fields.length) {
+      int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
+      Fieldable[] newArray = new Fieldable[newSize];
+      System.arraycopy(fields, 0, newArray, 0, fieldCount);
+      fields = newArray;
+    }
+
+    fields[fieldCount++] = field;
+  }
+
   public void abort() {
     consumer.abort();
   }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverter.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverter.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverter.java Sun May 22 21:45:19 2011
@@ -18,12 +18,13 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
-
 import java.util.Map;
 
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.util.AttributeSource;
+
 
 /** This is a DocFieldConsumer that inverts each field,
  *  separately, from a Document, and accepts a
@@ -34,42 +35,72 @@ final class DocInverter extends DocField
   final InvertedDocConsumer consumer;
   final InvertedDocEndConsumer endConsumer;
 
-  public DocInverter(InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+  final DocumentsWriterPerThread.DocState docState;
+
+  final FieldInvertState fieldState = new FieldInvertState();
+
+  final SingleTokenAttributeSource singleToken = new SingleTokenAttributeSource();
+
+  static class SingleTokenAttributeSource extends AttributeSource {
+    final CharTermAttribute termAttribute;
+    final OffsetAttribute offsetAttribute;
+
+    private SingleTokenAttributeSource() {
+      termAttribute = addAttribute(CharTermAttribute.class);
+      offsetAttribute = addAttribute(OffsetAttribute.class);
+    }
+
+    public void reinit(String stringValue, int startOffset,  int endOffset) {
+      termAttribute.setEmpty().append(stringValue);
+      offsetAttribute.setOffset(startOffset, endOffset);
+    }
+  }
+
+  // Used to read a string value for a field
+  final ReusableStringReader stringReader = new ReusableStringReader();
+
+  public DocInverter(DocumentsWriterPerThread.DocState docState, InvertedDocConsumer consumer, InvertedDocEndConsumer endConsumer) {
+    this.docState = docState;
     this.consumer = consumer;
     this.endConsumer = endConsumer;
   }
 
   @Override
-  void flush(Map<DocFieldConsumerPerThread, Collection<DocFieldConsumerPerField>> threadsAndFields, SegmentWriteState state) throws IOException {
-
-    Map<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>> childThreadsAndFields = new HashMap<InvertedDocConsumerPerThread,Collection<InvertedDocConsumerPerField>>();
-    Map<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>> endChildThreadsAndFields = new HashMap<InvertedDocEndConsumerPerThread,Collection<InvertedDocEndConsumerPerField>>();
+  void flush(Map<FieldInfo, DocFieldConsumerPerField> fieldsToFlush, SegmentWriteState state) throws IOException {
 
-    for (Map.Entry<DocFieldConsumerPerThread,Collection<DocFieldConsumerPerField>> entry : threadsAndFields.entrySet() ) {
+    Map<FieldInfo, InvertedDocConsumerPerField> childFieldsToFlush = new HashMap<FieldInfo, InvertedDocConsumerPerField>();
+    Map<FieldInfo, InvertedDocEndConsumerPerField> endChildFieldsToFlush = new HashMap<FieldInfo, InvertedDocEndConsumerPerField>();
 
+    for (Map.Entry<FieldInfo, DocFieldConsumerPerField> fieldToFlush : fieldsToFlush.entrySet()) {
+      DocInverterPerField perField = (DocInverterPerField) fieldToFlush.getValue();
+      childFieldsToFlush.put(fieldToFlush.getKey(), perField.consumer);
+      endChildFieldsToFlush.put(fieldToFlush.getKey(), perField.endConsumer);
+    }
 
-      DocInverterPerThread perThread = (DocInverterPerThread) entry.getKey();
+    consumer.flush(childFieldsToFlush, state);
+    endConsumer.flush(endChildFieldsToFlush, state);
+  }
 
-      Collection<InvertedDocConsumerPerField> childFields = new HashSet<InvertedDocConsumerPerField>();
-      Collection<InvertedDocEndConsumerPerField> endChildFields = new HashSet<InvertedDocEndConsumerPerField>();
-      for (final DocFieldConsumerPerField field: entry.getValue() ) {  
-        DocInverterPerField perField = (DocInverterPerField) field;
-        childFields.add(perField.consumer);
-        endChildFields.add(perField.endConsumer);
-      }
+  @Override
+  public void startDocument() throws IOException {
+    consumer.startDocument();
+    endConsumer.startDocument();
+  }
 
-      childThreadsAndFields.put(perThread.consumer, childFields);
-      endChildThreadsAndFields.put(perThread.endConsumer, endChildFields);
-    }
-    
-    consumer.flush(childThreadsAndFields, state);
-    endConsumer.flush(endChildThreadsAndFields, state);
+  public void finishDocument() throws IOException {
+    // TODO: allow endConsumer.finishDocument to also return
+    // a DocWriter
+    endConsumer.finishDocument();
+    consumer.finishDocument();
   }
 
   @Override
   void abort() {
-    consumer.abort();
-    endConsumer.abort();
+    try {
+      consumer.abort();
+    } finally {
+      endConsumer.abort();
+    }
   }
 
   @Override
@@ -78,7 +109,8 @@ final class DocInverter extends DocField
   }
 
   @Override
-  public DocFieldConsumerPerThread addThread(DocFieldProcessorPerThread docFieldProcessorPerThread) {
-    return new DocInverterPerThread(docFieldProcessorPerThread, this);
+  public DocFieldConsumerPerField addField(FieldInfo fi) {
+    return new DocInverterPerField(this, fi);
   }
+
 }

Modified: lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java?rev=1126234&r1=1126233&r2=1126234&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java Sun May 22 21:45:19 2011
@@ -35,20 +35,20 @@ import org.apache.lucene.analysis.tokena
 
 final class DocInverterPerField extends DocFieldConsumerPerField {
 
-  final private DocInverterPerThread perThread;
-  final private FieldInfo fieldInfo;
+  final private DocInverter parent;
+  final FieldInfo fieldInfo;
   final InvertedDocConsumerPerField consumer;
   final InvertedDocEndConsumerPerField endConsumer;
-  final DocumentsWriter.DocState docState;
+  final DocumentsWriterPerThread.DocState docState;
   final FieldInvertState fieldState;
 
-  public DocInverterPerField(DocInverterPerThread perThread, FieldInfo fieldInfo) {
-    this.perThread = perThread;
+  public DocInverterPerField(DocInverter parent, FieldInfo fieldInfo) {
+    this.parent = parent;
     this.fieldInfo = fieldInfo;
-    docState = perThread.docState;
-    fieldState = perThread.fieldState;
-    this.consumer = perThread.consumer.addField(this, fieldInfo);
-    this.endConsumer = perThread.endConsumer.addField(this, fieldInfo);
+    docState = parent.docState;
+    fieldState = parent.fieldState;
+    this.consumer = parent.consumer.addField(this, fieldInfo);
+    this.endConsumer = parent.endConsumer.addField(this, fieldInfo);
   }
 
   @Override
@@ -80,8 +80,8 @@ final class DocInverterPerField extends 
         if (!field.isTokenized()) {		  // un-tokenized field
           String stringValue = field.stringValue();
           final int valueLength = stringValue.length();
-          perThread.singleToken.reinit(stringValue, 0, valueLength);
-          fieldState.attributeSource = perThread.singleToken;
+          parent.singleToken.reinit(stringValue, 0, valueLength);
+          fieldState.attributeSource = parent.singleToken;
           consumer.start(field);
 
           boolean success = false;
@@ -89,8 +89,9 @@ final class DocInverterPerField extends 
             consumer.add();
             success = true;
           } finally {
-            if (!success)
+            if (!success) {
               docState.docWriter.setAborting();
+            }
           }
           fieldState.offset += valueLength;
           fieldState.length++;
@@ -114,8 +115,8 @@ final class DocInverterPerField extends 
               if (stringValue == null) {
                 throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
               }
-              perThread.stringReader.init(stringValue);
-              reader = perThread.stringReader;
+              parent.stringReader.init(stringValue);
+              reader = parent.stringReader;
             }
           
             // Tokenize field and add to postingTable
@@ -166,8 +167,9 @@ final class DocInverterPerField extends 
                 consumer.add();
                 success = true;
               } finally {
-                if (!success)
+                if (!success) {
                   docState.docWriter.setAborting();
+                }
               }
               fieldState.length++;
               fieldState.position++;
@@ -195,4 +197,9 @@ final class DocInverterPerField extends 
     consumer.finish();
     endConsumer.finish();
   }
+
+  @Override
+  FieldInfo getFieldInfo() {
+    return fieldInfo;
+  }
 }