You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/07 13:21:14 UTC
svn commit: r1479862 [12/38] - in /lucene/dev/branches/lucene4258: ./ dev-tools/ dev-tools/idea/.idea/ dev-tools/idea/.idea/libraries/ dev-tools/maven/ dev-tools/maven/solr/ dev-tools/maven/solr/core/src/java/ dev-tools/maven/solr/solrj/src/java/ dev-t...

Modified: lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java Tue May  7 11:20:55 2013
@@ -315,8 +315,7 @@ class SimpleTextFieldsReader extends Fie
     @Override
     public int advance(int target) throws IOException {
       // Naive -- better to index skip data
-      while(nextDoc() < target);
-      return docID;
+      return slowAdvance(target);
     }
     
     @Override
@@ -422,8 +421,7 @@ class SimpleTextFieldsReader extends Fie
     @Override
     public int advance(int target) throws IOException {
       // Naive -- better to index skip data
-      while(nextDoc() < target);
-      return docID;
+      return slowAdvance(target);
     }
 
     @Override

Modified: lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java Tue May  7 11:20:55 2013
@@ -430,12 +430,8 @@ public class SimpleTextTermVectorsReader
     }
 
     @Override
-    public int advance(int target) {
-      if (!didNext && target == 0) {
-        return nextDoc();
-      } else {
-        return (doc = NO_MORE_DOCS);
-      }
+    public int advance(int target) throws IOException {
+      return slowAdvance(target);
     }
 
     public void reset(Bits liveDocs, int freq) {
@@ -487,12 +483,8 @@ public class SimpleTextTermVectorsReader
     }
 
     @Override
-    public int advance(int target) {
-      if (!didNext && target == 0) {
-        return nextDoc();
-      } else {
-        return (doc = NO_MORE_DOCS);
-      }
+    public int advance(int target) throws IOException {
+      return slowAdvance(target);
     }
 
     public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {

Modified: lucene/dev/branches/lucene4258/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskDocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskDocValuesFormat.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskDocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4258/lucene/codecs/src/test/org/apache/lucene/codecs/diskdv/TestDiskDocValuesFormat.java Tue May  7 11:20:55 2013
@@ -18,13 +18,13 @@ package org.apache.lucene.codecs.diskdv;
  */
 
 import org.apache.lucene.codecs.Codec;
-import org.apache.lucene.index.BaseDocValuesFormatTestCase;
+import org.apache.lucene.index.BaseCompressingDocValuesFormatTestCase;
 import org.apache.lucene.util._TestUtil;
 
 /**
  * Tests DiskDocValuesFormat
  */
-public class TestDiskDocValuesFormat extends BaseDocValuesFormatTestCase {
+public class TestDiskDocValuesFormat extends BaseCompressingDocValuesFormatTestCase {
   private final Codec codec = _TestUtil.alwaysDocValuesFormat(new DiskDocValuesFormat());
 
   @Override

Modified: lucene/dev/branches/lucene4258/lucene/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/common-build.xml?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/common-build.xml (original)
+++ lucene/dev/branches/lucene4258/lucene/common-build.xml Tue May  7 11:20:55 2013
@@ -47,16 +47,26 @@
     <format property="dateversion" pattern="yyyy.MM.dd.HH.mm.ss" />
   </tstamp>
 
-  <property name="name" value="${ant.project.name}"/>
   <property name="Name" value="Lucene"/>
-  <property name="dev.version" value="5.0-SNAPSHOT"/>
+  
+  <property name="name" value="${ant.project.name}"/>
   <property name="tests.luceneMatchVersion" value="5.0"/>
+  <property name="dev.version.base" value="5.0"/>
+  <property name="dev.version.suffix" value="SNAPSHOT"/>
+  <property name="dev.version" value="${dev.version.base}-${dev.version.suffix}"/>
   <property name="version" value="${dev.version}"/>
   <property name="spec.version" value="${version}"/>	
+  
   <property name="year" value="2000-${current.year}"/>
+  
+  <!-- Lucene modules unfortunately don't have the "lucene-" prefix, so we add it if no prefix is given in $name: -->
+  <condition property="final.name" value="${name}-${version}">
+    <matches pattern="^(lucene|solr)\b" string="${name}"/>
+  </condition>
   <property name="final.name" value="lucene-${name}-${version}"/>
 
-  <property name="common.classpath.excludes" value="**/*.txt,**/*.template,**/*.sha1" />
+  <!-- we exclude ext/*.jar because we don't want example/lib/ext logging jars on the cp -->
+  <property name="common.classpath.excludes" value="**/*.txt,**/*.template,**/*.sha1,ext/*.jar" />
 
   <property name="ivy.bootstrap.version" value="2.3.0" />
   <property name="ivy.default.configuration" value="*"/>
@@ -106,6 +116,7 @@
   <property name="tests.asserts.gracious" value="false"/>
   <property name="tests.verbose" value="false"/>
   <property name="tests.infostream" value="${tests.verbose}"/>
+  <property name="tests.filterstacks" value="true"/>
   
   <condition property="tests.heapsize" value="768M">
     <isset property="run.clover"/>
@@ -293,8 +304,7 @@
       </or>
       <or>
         <equals arg1="${build.java.runtime}" arg2="1.7"/>
-        <!-- TODO: Current Java 8 JDKs have broken Javadocs -->
-        <!--<equals arg1="${build.java.runtime}" arg2="1.8"/>-->
+        <equals arg1="${build.java.runtime}" arg2="1.8"/>
       </or>
       <!-- TODO: Fix this! For now only run this on 64bit, because jTIDY OOMs with default heap size: -->
       <contains string="${os.arch}" substring="64"/>
@@ -986,8 +996,25 @@
                     maxClassNameColumns="${tests.maxClassNameColumns}"
                     
                     timestamps="${tests.timestamps}"
-                    showNumFailures="${tests.showNumFailures}"
-                />
+                    showNumFailures="${tests.showNumFailures}">
+
+                  <!-- Filter stack traces. The default set of filters is similar to Ant's (reflection, assertions, junit's own stuff). -->
+                  <junit4:filtertrace defaults="true" enabled="${tests.filterstacks}">
+                    <!-- Lucene-specific stack frames (test rules mostly). -->
+                    <containsstring contains="at com.carrotsearch.randomizedtesting.RandomizedRunner" />
+                    <containsstring contains="at org.apache.lucene.util.AbstractBeforeAfterRule" />
+                    <containsstring contains="at com.carrotsearch.randomizedtesting.rules." />
+                    <containsstring contains="at org.apache.lucene.util.TestRule" />
+                    <containsstring contains="at com.carrotsearch.randomizedtesting.rules.StatementAdapter" />
+                    <containsstring contains="at com.carrotsearch.randomizedtesting.ThreadLeakControl" />
+
+                    <!-- Add custom filters if you like. Lines that match these will be removed. -->
+                    <!--
+                    <containsstring contains=".." /> 
+                    <containsregex pattern="^(\s+at )(org\.junit\.)" /> 
+                    -->
+                  </junit4:filtertrace>                    
+                </junit4:report-text>
 
                 <!-- Emits full status for all tests, their relative order on slaves. -->
                 <junit4:report-text
@@ -1184,6 +1211,9 @@ ant -Dtests.file.encoding=XXX ...
 # the test passes.
 ant -Dtests.leaveTemporary=true
 
+# Do *not* filter stack traces emitted to the console.
+ant -Dtests.filterstacks=false
+
 # Output test files and reports.
 ${tests-output}/tests-report.txt    - full ASCII tests report
 ${tests-output}/tests-failures.txt  - failures only (if any)
@@ -1516,6 +1546,14 @@ ${tests-output}/junit4-*.suites     - pe
         <pattern substring="Permission is hereby granted, free of charge, to any person obtaining a copy"/>
       </rat:substringMatcher>
 
+      <!-- apache -->
+      <rat:substringMatcher licenseFamilyCategory="AL   "
+             licenseFamilyName="Apache">
+        <pattern substring="Licensed to the Apache Software Foundation (ASF) under"/>
+        <!-- this is the old-school one under some files -->
+        <pattern substring="Licensed under the Apache License, Version 2.0 (the &quot;License&quot;)"/>
+      </rat:substringMatcher>
+
       <rat:substringMatcher licenseFamilyCategory="GEN  "
              licenseFamilyName="Generated">
       <!-- svg files generated by gnuplot -->
@@ -1527,7 +1565,7 @@ ${tests-output}/junit4-*.suites     - pe
       </rat:substringMatcher>
 
       <!-- built in approved licenses -->
-      <rat:approvedLicense familyName="Apache License Version 2.0"/>
+      <rat:approvedLicense familyName="Apache"/>
       <rat:approvedLicense familyName="The MIT License"/>
       <rat:approvedLicense familyName="Modified BSD License"/>
       <rat:approvedLicense familyName="Generated"/>
@@ -1537,7 +1575,12 @@ ${tests-output}/junit4-*.suites     - pe
     <echo>${rat.output}</echo>
     <delete>
       <fileset file="${rat.sources.logfile}">
-        <containsregexp expression="^0 Unknown Licenses"/>
+        <and>
+          <containsregexp expression="^0 Unknown Licenses"/>
+          <not>
+            <containsregexp expression="^\s+!AL"/>
+          </not>
+        </and>
       </fileset>
     </delete>
     <!-- fail if we didnt find the pattern -->
@@ -1939,7 +1982,7 @@ ${tests-output}/junit4-*.suites     - pe
   
   <!-- Forbidden API Task -->
   <target name="install-forbidden-apis" unless="forbidden-apis.loaded" depends="ivy-availability-check,ivy-configure">
-    <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="1.2"
+    <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="1.3"
       inline="true" conf="default" transitive="true" pathid="forbidden-apis.classpath"/>
     <taskdef name="forbidden-apis" classname="de.thetaphi.forbiddenapis.AntTask" classpathref="forbidden-apis.classpath"/>
     <property name="forbidden-apis.loaded" value="true"/>

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/TokenStreamToAutomaton.java Tue May  7 11:20:55 2013
@@ -17,12 +17,8 @@ package org.apache.lucene.analysis;
  * limitations under the License.
  */
 
-import java.io.FileOutputStream;
 import java.io.IOException;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
 
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
@@ -43,8 +39,16 @@ import org.apache.lucene.util.automaton.
  * @lucene.experimental */
 public class TokenStreamToAutomaton {
 
+  private boolean preservePositionIncrements;
+
   /** Sole constructor. */
   public TokenStreamToAutomaton() {
+    this.preservePositionIncrements = true;
+  }
+
+  /** Whether to generate holes in the automaton for missing positions, <code>true</code> by default. */
+  public void setPreservePositionIncrements(boolean enablePositionIncrements) {
+    this.preservePositionIncrements = enablePositionIncrements;
   }
 
   private static class Position implements RollingBuffer.Resettable {
@@ -108,6 +112,9 @@ public class TokenStreamToAutomaton {
     int maxOffset = 0;
     while (in.incrementToken()) {
       int posInc = posIncAtt.getPositionIncrement();
+      if (!preservePositionIncrements && posInc > 1) {
+        posInc = 1;
+      }
       assert pos > -1 || posInc > 0;
 
       if (posInc > 0) {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/package.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/package.html?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/package.html (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/analysis/package.html Tue May  7 11:20:55 2013
@@ -282,18 +282,18 @@ and proximity searches (though sentence 
 <p>
    If the selected analyzer filters the stop words "is" and "the", then for a document 
    containing the string "blue is the sky", only the tokens "blue", "sky" are indexed, 
-   with position("sky") = 1 + position("blue"). Now, a phrase query "blue is the sky"
+   with position("sky") = 3 + position("blue"). Now, a phrase query "blue is the sky"
    would find that document, because the same analyzer filters the same stop words from
-   that query. But also the phrase query "blue sky" would find that document.
+   that query. But the phrase query "blue sky" would not find that document because the
+   position increment between "blue" and "sky" is only 1.
 </p>
 <p>   
-   If this behavior does not fit the application needs, a modified analyzer can
-   be used, that would increment further the positions of tokens following a
-   removed stop word, using
-   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#setPositionIncrement(int)}.
-   This can be done with something like the following (note, however, that 
-   StopFilter natively includes this capability by subclassing 
-   FilteringTokenFilter}:
+   If this behavior does not fit the application needs, the query parser needs to be
+   configured to not take position increments into account when generating phrase queries.
+</p>
+<p>
+  Note that a StopFilter MUST increment the position increment in order not to generate corrupt
+  tokenstream graphs. Here is the logic used by StopFilter to increment positions when filtering out tokens:
 </p>
 <PRE class="prettyprint">
   public TokenStream tokenStream(final String fieldName, Reader reader) {
@@ -308,7 +308,7 @@ and proximity searches (though sentence 
           boolean hasNext = ts.incrementToken();
           if (hasNext) {
             if (stopWords.contains(termAtt.toString())) {
-              extraIncrement++; // filter this word
+              extraIncrement += posIncrAtt.getPositionIncrement(); // filter this word
               continue;
             } 
             if (extraIncrement>0) {
@@ -323,11 +323,6 @@ and proximity searches (though sentence 
   }
 </PRE>
 <p>
-  Now, with this modified analyzer, the phrase query "blue sky" would find that document.
-  But note that this is yet not a perfect solution, because any phrase query "blue w1 w2 sky"
-  where both w1 and w2 are stop words would match that document.
-</p>
-<p>
    A few more use cases for modifying position increments are:
 </p>
 <ol>
@@ -338,6 +333,72 @@ and proximity searches (though sentence 
     As result, all synonyms of a token would be considered to appear in exactly the 
     same position as that token, and so would they be seen by phrase and proximity searches.</li>
 </ol>
+
+<h3>Token Position Length</h3>
+<p>
+   By default, all tokens created by Analyzers and Tokenizers have a
+   {@link org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute#getPositionLength() position length} of one.
+   This means that the token occupies a single position. This attribute is not indexed
+   and thus not taken into account for positional queries, but is used by eg. suggesters.
+</p>
+<p>
+   The main use case for positions lengths is multi-word synonyms. With single-word
+   synonyms, setting the position increment to 0 is enough to denote the fact that two
+   words are synonyms, for example:
+</p>
+<table>
+<tr><td>Term</td><td>red</td><td>magenta</td></tr>
+<tr><td>Position increment</td><td>1</td><td>0</td></tr>
+</table>
+<p>
+   Given that position(magenta) = 0 + position(red), they are at the same position, so anything
+   working with analyzers will return the exact same result if you replace "magenta" with "red"
+   in the input. However, multi-word synonyms are more tricky. Let's say that you want to build
+   a TokenStream where "IBM" is a synonym of "Internal Business Machines". Position increments
+   are not enough anymore:
+</p>
+<table>
+<tr><td>Term</td><td>IBM</td><td>International</td><td>Business</td><td>Machines</td></tr>
+<tr><td>Position increment</td><td>1</td><td>0</td><td>1</td><td>1</td></tr>
+</table>
+<p>
+   The problem with this token stream is that "IBM" is at the same position as "International"
+   although it is a synonym with "International Business Machines" as a whole. Setting
+   the position increment of "Business" and "Machines" to 0 wouldn't help as it would mean
+   than "International" is a synonym of "Business". The only way to solve this issue is to
+   make "IBM" span across 3 positions, this is where position lengths come to rescue.
+</p>
+<table>
+<tr><td>Term</td><td>IBM</td><td>International</td><td>Business</td><td>Machines</td></tr>
+<tr><td>Position increment</td><td>1</td><td>0</td><td>1</td><td>1</td></tr>
+<tr><td>Position length</td><td>3</td><td>1</td><td>1</td><td>1</td></tr>
+</table>
+<p>
+   This new attribute makes clear that "IBM" and "International Business Machines" start and end
+   at the same positions.
+</p>
+<a name="corrupt" />
+<h3>How to not write corrupt token streams</h3>
+<p>
+   There are a few rules to observe when writing custom Tokenizers and TokenFilters:
+</p>
+<ul>
+  <li>The first position increment must be &gt; 0.</li>
+  <li>Positions must not go backward.</li>
+  <li>Tokens that have the same start position must have the same start offset.</li>
+  <li>Tokens that have the same end position (taking into account the position length) must have the same end offset.</li>
+</ul>
+<p>
+   Although these rules might seem easy to follow, problems can quickly happen when chaining
+   badly implemented filters that play with positions and offsets, such as synonym or n-grams
+   filters. Here are good practices for writing correct filters:
+</p>
+<ul>
+  <li>Token filters should not modify offsets. If you feel that your filter would need to modify offsets, then it should probably be implemented as a tokenizer.</li>
+  <li>Token filters should not insert positions. If a filter needs to add tokens, then they shoud all have a position increment of 0.</li>
+  <li>When they remove tokens, token filters should increment the position increment of the following token.</li>
+  <li>Token filters should preserve position lengths.</li>
+</ul>
 <h2>TokenStream API</h2>
 <p>
 	"Flexible Indexing" summarizes the effort of making the Lucene indexer
@@ -383,6 +444,10 @@ and proximity searches (though sentence 
     <td>See above for detailed information about position increment.</td>
   </tr>
   <tr>
+    <td>{@link org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute}</td>
+    <td>The number of positions occupied by a token.</td>
+  </tr>
+  <tr>
     <td>{@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute}</td>
     <td>The payload that a Token can optionally have.</td>
   </tr>
@@ -532,20 +597,26 @@ public final class LengthFilter extends 
   private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
 
   /**
-   * Build a filter that removes words that are too long or too
-   * short from the text.
+   * Create a new LengthFilter. This will filter out tokens whose
+   * CharTermAttribute is either too short
+   * (&lt; min) or too long (&gt; max).
+   * @param version the Lucene match version
+   * @param in      the TokenStream to consume
+   * @param min     the minimum length
+   * @param max     the maximum length
    */
-  public LengthFilter(boolean enablePositionIncrements, TokenStream in, int min, int max) {
-    super(enablePositionIncrements, in);
+  public LengthFilter(Version version, TokenStream in, int min, int max) {
+    super(version, in);
     this.min = min;
     this.max = max;
   }
-  
+
   {@literal @Override}
-  public boolean accept() throws IOException {
+  public boolean accept() {
     final int len = termAtt.length();
-    return (len >= min && len <= max);
+    return (len &gt;= min &amp;&amp; len <= max);
   }
+
 }
 </pre>
 <p>
@@ -573,66 +644,39 @@ public final class LengthFilter extends 
 public abstract class FilteringTokenFilter extends TokenFilter {
 
   private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private boolean enablePositionIncrements; // no init needed, as ctor enforces setting value!
 
-  public FilteringTokenFilter(boolean enablePositionIncrements, TokenStream input){
-    super(input);
-    this.enablePositionIncrements = enablePositionIncrements;
+  /**
+   * Create a new FilteringTokenFilter.
+   * @param in      the TokenStream to consume
+   */
+  public FilteringTokenFilter(Version version, TokenStream in) {
+    super(in);
   }
 
-  /** Override this method and return if the current input token should be returned by {@literal {@link #incrementToken}}. */
+  /** Override this method and return if the current input token should be returned by incrementToken. */
   protected abstract boolean accept() throws IOException;
 
   {@literal @Override}
   public final boolean incrementToken() throws IOException {
-    if (enablePositionIncrements) {
-      int skippedPositions = 0;
-      while (input.incrementToken()) {
-        if (accept()) {
-          if (skippedPositions != 0) {
-            posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-          }
-          return true;
-        }
-        skippedPositions += posIncrAtt.getPositionIncrement();
-      }
-    } else {
-      while (input.incrementToken()) {
-        if (accept()) {
-          return true;
+    int skippedPositions = 0;
+    while (input.incrementToken()) {
+      if (accept()) {
+        if (skippedPositions != 0) {
+          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
         }
+        return true;
       }
+      skippedPositions += posIncrAtt.getPositionIncrement();
     }
     // reached EOS -- return false
     return false;
   }
 
-  /**
-   * {@literal @see #setEnablePositionIncrements(boolean)}
-   */
-  public boolean getEnablePositionIncrements() {
-    return enablePositionIncrements;
+  {@literal @Override}
+  public void reset() throws IOException {
+    super.reset();
   }
 
-  /**
-   * If <code>true</code>, this TokenFilter will preserve
-   * positions of the incoming tokens (ie, accumulate and
-   * set position increments of the removed tokens).
-   * Generally, <code>true</code> is best as it does not
-   * lose information (positions of the original tokens)
-   * during indexing.
-   * 
-   * <p> When set, when a token is stopped
-   * (omitted), the position increment of the following
-   * token is incremented.
-   *
-   * <p> <b>NOTE</b>: be sure to also
-   * set org.apache.lucene.queryparser.classic.QueryParser#setEnablePositionIncrements if
-   * you use QueryParser to create queries.
-   */
-  public void setEnablePositionIncrements(boolean enable) {
-    this.enablePositionIncrements = enable;
-  }
 }
 </pre>
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsIndexReader.java Tue May  7 11:20:55 2013
@@ -82,7 +82,7 @@ public final class CompressingStoredFiel
       avgChunkDocs[blockCount] = fieldsIndexIn.readVInt();
       final int bitsPerDocBase = fieldsIndexIn.readVInt();
       if (bitsPerDocBase > 32) {
-        throw new CorruptIndexException("Corrupted");
+        throw new CorruptIndexException("Corrupted bitsPerDocBase (resource=" + fieldsIndexIn + ")");
       }
       docBasesDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerDocBase);
 
@@ -91,7 +91,7 @@ public final class CompressingStoredFiel
       avgChunkSizes[blockCount] = fieldsIndexIn.readVLong();
       final int bitsPerStartPointer = fieldsIndexIn.readVInt();
       if (bitsPerStartPointer > 64) {
-        throw new CorruptIndexException("Corrupted");
+        throw new CorruptIndexException("Corrupted bitsPerStartPointer (resource=" + fieldsIndexIn + ")");
       }
       startPointersDeltas[blockCount] = PackedInts.getReaderNoHeader(fieldsIndexIn, PackedInts.Format.PACKED, packedIntsVersion, numChunks, bitsPerStartPointer);
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsReader.java Tue May  7 11:20:55 2013
@@ -203,7 +203,7 @@ public final class CompressingStoredFiel
         || docBase + chunkDocs > numDocs) {
       throw new CorruptIndexException("Corrupted: docID=" + docID
           + ", docBase=" + docBase + ", chunkDocs=" + chunkDocs
-          + ", numDocs=" + numDocs);
+          + ", numDocs=" + numDocs + " (resource=" + fieldsStream + ")");
     }
 
     final int numStoredFields, offset, length, totalLength;
@@ -217,7 +217,7 @@ public final class CompressingStoredFiel
       if (bitsPerStoredFields == 0) {
         numStoredFields = fieldsStream.readVInt();
       } else if (bitsPerStoredFields > 31) {
-        throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields);
+        throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
       } else {
         final long filePointer = fieldsStream.getFilePointer();
         final PackedInts.Reader reader = PackedInts.getDirectReaderNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields);
@@ -231,7 +231,7 @@ public final class CompressingStoredFiel
         offset = (docID - docBase) * length;
         totalLength = chunkDocs * length;
       } else if (bitsPerStoredFields > 31) {
-        throw new CorruptIndexException("bitsPerLength=" + bitsPerLength);
+        throw new CorruptIndexException("bitsPerLength=" + bitsPerLength + " (resource=" + fieldsStream + ")");
       } else {
         final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerLength, 1);
         int off = 0;
@@ -249,7 +249,7 @@ public final class CompressingStoredFiel
     }
 
     if ((length == 0) != (numStoredFields == 0)) {
-      throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields);
+      throw new CorruptIndexException("length=" + length + ", numStoredFields=" + numStoredFields + " (resource=" + fieldsStream + ")");
     }
     if (numStoredFields == 0) {
       // nothing to do
@@ -344,7 +344,7 @@ public final class CompressingStoredFiel
           || docBase + chunkDocs > numDocs) {
         throw new CorruptIndexException("Corrupted: current docBase=" + this.docBase
             + ", current numDocs=" + this.chunkDocs + ", new docBase=" + docBase
-            + ", new numDocs=" + chunkDocs);
+            + ", new numDocs=" + chunkDocs + " (resource=" + fieldsStream + ")");
       }
       this.docBase = docBase;
       this.chunkDocs = chunkDocs;
@@ -363,7 +363,7 @@ public final class CompressingStoredFiel
         if (bitsPerStoredFields == 0) {
           Arrays.fill(numStoredFields, 0, chunkDocs, fieldsStream.readVInt());
         } else if (bitsPerStoredFields > 31) {
-          throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields);
+          throw new CorruptIndexException("bitsPerStoredFields=" + bitsPerStoredFields + " (resource=" + fieldsStream + ")");
         } else {
           final PackedInts.ReaderIterator it = PackedInts.getReaderIteratorNoHeader(fieldsStream, PackedInts.Format.PACKED, packedIntsVersion, chunkDocs, bitsPerStoredFields, 1);
           for (int i = 0; i < chunkDocs; ++i) {
@@ -393,7 +393,7 @@ public final class CompressingStoredFiel
       final int chunkSize = chunkSize();
       decompressor.decompress(fieldsStream, chunkSize, 0, chunkSize, bytes);
       if (bytes.length != chunkSize) {
-        throw new CorruptIndexException("Corrupted: expected chunk size = " + chunkSize() + ", got " + bytes.length);
+        throw new CorruptIndexException("Corrupted: expected chunk size = " + chunkSize() + ", got " + bytes.length + " (resource=" + fieldsStream + ")");
       }
     }
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingStoredFieldsWriter.java Tue May  7 11:20:55 2013
@@ -53,6 +53,9 @@ import org.apache.lucene.util.packed.Pac
  */
 public final class CompressingStoredFieldsWriter extends StoredFieldsWriter {
 
+  // hard limit on the maximum number of documents per chunk
+  static final int MAX_DOCUMENTS_PER_CHUNK = 128;
+
   static final int         STRING = 0x00;
   static final int       BYTE_ARR = 0x01;
   static final int    NUMERIC_INT = 0x02;
@@ -200,7 +203,7 @@ public final class CompressingStoredFiel
 
   private boolean triggerFlush() {
     return bufferedDocs.length >= chunkSize || // chunks of at least chunkSize bytes
-        numBufferedDocs >= chunkSize; // can be necessary if most docs are empty
+        numBufferedDocs >= MAX_DOCUMENTS_PER_CHUNK;
   }
 
   private void flush() throws IOException {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsReader.java Tue May  7 11:20:55 2013
@@ -187,7 +187,7 @@ public final class CompressingTermVector
     final int docBase = vectorsStream.readVInt();
     final int chunkDocs = vectorsStream.readVInt();
     if (doc < docBase || doc >= docBase + chunkDocs || docBase + chunkDocs > numDocs) {
-      throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc);
+      throw new CorruptIndexException("docBase=" + docBase + ",chunkDocs=" + chunkDocs + ",doc=" + doc + " (resource=" + vectorsStream + ")");
     }
 
     final int skip; // number of fields to skip
@@ -1030,11 +1030,7 @@ public final class CompressingTermVector
 
     @Override
     public int advance(int target) throws IOException {
-      if (doc == -1 && target == 0 && (liveDocs == null || liveDocs.get(0))) {
-        return (doc = 0);
-      } else {
-        return (doc = NO_MORE_DOCS);
-      }
+      return slowAdvance(target);
     }
 
     @Override

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressingTermVectorsWriter.java Tue May  7 11:20:55 2013
@@ -56,6 +56,9 @@ import org.apache.lucene.util.packed.Pac
  */
 public final class CompressingTermVectorsWriter extends TermVectorsWriter {
 
+  // hard limit on the maximum number of documents per chunk
+  static final int MAX_DOCUMENTS_PER_CHUNK = 128;
+
   static final String VECTORS_EXTENSION = "tvd";
   static final String VECTORS_INDEX_EXTENSION = "tvx";
 
@@ -322,7 +325,8 @@ public final class CompressingTermVector
   }
 
   private boolean triggerFlush() {
-    return termSuffixes.length >= chunkSize || pendingDocs.size() >= chunkSize;
+    return termSuffixes.length >= chunkSize
+        || pendingDocs.size() >= MAX_DOCUMENTS_PER_CHUNK;
   }
 
   private void flush() throws IOException {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/compressing/CompressionMode.java Tue May  7 11:20:55 2013
@@ -134,7 +134,7 @@ public abstract class CompressionMode {
       }
       final int decompressedLength = LZ4.decompress(in, offset + length, bytes.bytes, 0);
       if (decompressedLength > originalLength) {
-        throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength);
+        throw new CorruptIndexException("Corrupted: lengths mismatch: " + decompressedLength + " > " + originalLength + " (resource=" + in + ")");
       }
       bytes.offset = offset;
       bytes.length = length;
@@ -222,7 +222,7 @@ public abstract class CompressionMode {
         }
       }
       if (bytes.length != originalLength) {
-        throw new CorruptIndexException("Lengths mismatch: " + bytes.length + " != " + originalLength);
+        throw new CorruptIndexException("Lengths mismatch: " + bytes.length + " != " + originalLength + " (resource=" + in + ")");
       }
       bytes.offset = offset;
       bytes.length = length;

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40FieldInfosReader.java Tue May  7 11:20:55 2013
@@ -99,7 +99,7 @@ class Lucene40FieldInfosReader extends F
         }
         if (oldNormsType.mapping != null) {
           if (oldNormsType.mapping != DocValuesType.NUMERIC) {
-            throw new CorruptIndexException("invalid norm type: " + oldNormsType);
+            throw new CorruptIndexException("invalid norm type: " + oldNormsType + " (resource=" + input + ")");
           }
           attributes.put(LEGACY_NORM_TYPE_KEY, oldNormsType.name());
         }

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java Tue May  7 11:20:55 2013
@@ -605,12 +605,8 @@ public class Lucene40TermVectorsReader e
     }
 
     @Override
-    public int advance(int target) {
-      if (!didNext && target == 0) {
-        return nextDoc();
-      } else {
-        return (doc = NO_MORE_DOCS);
-      }
+    public int advance(int target) throws IOException {
+      return slowAdvance(target);
     }
 
     public void reset(Bits liveDocs, int freq) {
@@ -664,12 +660,8 @@ public class Lucene40TermVectorsReader e
     }
 
     @Override
-    public int advance(int target) {
-      if (!didNext && target == 0) {
-        return nextDoc();
-      } else {
-        return (doc = NO_MORE_DOCS);
-      }
+    public int advance(int target) throws IOException {
+      return slowAdvance(target);
     }
 
     public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, int[] payloadLengths, byte[] payloadBytes) {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesConsumer.java Tue May  7 11:20:55 2013
@@ -34,6 +34,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.MathUtil;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FST.INPUT_TYPE;
@@ -49,7 +50,8 @@ import org.apache.lucene.util.packed.Pac
  */
 class Lucene42DocValuesConsumer extends DocValuesConsumer {
   static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_GCD_COMPRESSION = 1;
+  static final int VERSION_CURRENT = VERSION_GCD_COMPRESSION;
   
   static final byte NUMBER = 0;
   static final byte BYTES = 1;
@@ -60,6 +62,7 @@ class Lucene42DocValuesConsumer extends 
   static final byte DELTA_COMPRESSED = 0;
   static final byte TABLE_COMPRESSED = 1;
   static final byte UNCOMPRESSED = 2;
+  static final byte GCD_COMPRESSED = 3;
 
   final IndexOutput data, meta;
   final int maxDoc;
@@ -83,27 +86,53 @@ class Lucene42DocValuesConsumer extends 
       }
     }
   }
-  
+
   @Override
   public void addNumericField(FieldInfo field, Iterable<Number> values) throws IOException {
+    addNumericField(field, values, true);
+  }
+
+  void addNumericField(FieldInfo field, Iterable<Number> values, boolean optimizeStorage) throws IOException {
     meta.writeVInt(field.number);
     meta.writeByte(NUMBER);
     meta.writeLong(data.getFilePointer());
     long minValue = Long.MAX_VALUE;
     long maxValue = Long.MIN_VALUE;
+    long gcd = 0;
     // TODO: more efficient?
-    HashSet<Long> uniqueValues = new HashSet<Long>();
-    for(Number nv : values) {
-      long v = nv.longValue();
-      minValue = Math.min(minValue, v);
-      maxValue = Math.max(maxValue, v);
-      if (uniqueValues != null) {
-        if (uniqueValues.add(v)) {
-          if (uniqueValues.size() > 256) {
-            uniqueValues = null;
+    HashSet<Long> uniqueValues = null;
+    if (optimizeStorage) {
+      uniqueValues = new HashSet<>();
+
+      long count = 0;
+      for (Number nv : values) {
+        final long v = nv.longValue();
+
+        if (gcd != 1) {
+          if (v < Long.MIN_VALUE / 2 || v > Long.MAX_VALUE / 2) {
+            // in that case v - minValue might overflow and make the GCD computation return
+            // wrong results. Since these extreme values are unlikely, we just discard
+            // GCD computation for them
+            gcd = 1;
+          } else if (count != 0) { // minValue needs to be set first
+            gcd = MathUtil.gcd(gcd, v - minValue);
           }
         }
+
+        minValue = Math.min(minValue, v);
+        maxValue = Math.max(maxValue, v);
+
+        if (uniqueValues != null) {
+          if (uniqueValues.add(v)) {
+            if (uniqueValues.size() > 256) {
+              uniqueValues = null;
+            }
+          }
+        }
+
+        ++count;
       }
+      assert count == maxDoc;
     }
 
     if (uniqueValues != null) {
@@ -135,6 +164,18 @@ class Lucene42DocValuesConsumer extends 
         }
         writer.finish();
       }
+    } else if (gcd != 0 && gcd != 1) {
+      meta.writeByte(GCD_COMPRESSED);
+      meta.writeVInt(PackedInts.VERSION_CURRENT);
+      data.writeLong(minValue);
+      data.writeLong(gcd);
+      data.writeVInt(BLOCK_SIZE);
+
+      final BlockPackedWriter writer = new BlockPackedWriter(data, BLOCK_SIZE);
+      for (Number nv : values) {
+        writer.add((nv.longValue() - minValue) / gcd);
+      }
+      writer.finish();
     } else {
       meta.writeByte(DELTA_COMPRESSED); // delta-compressed
 
@@ -222,7 +263,7 @@ class Lucene42DocValuesConsumer extends 
   @Override
   public void addSortedField(FieldInfo field, Iterable<BytesRef> values, Iterable<Number> docToOrd) throws IOException {
     // write the ordinals as numerics
-    addNumericField(field, docToOrd);
+    addNumericField(field, docToOrd, false);
     
     // write the values as FST
     writeFST(field, values);

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesFormat.java Tue May  7 11:20:55 2013
@@ -44,6 +44,8 @@ import org.apache.lucene.util.packed.Blo
  *    <li>Uncompressed Numerics: when all values would fit into a single byte, and the 
  *        <code>acceptableOverheadRatio</code> would pack values into 8 bits per value anyway, they
  *        are written as absolute values (with no indirection or packing) for performance.
+ *    <li>GCD-compressed Numerics: when all numbers share a common divisor, such as dates, the greatest
+ *        common denominator (GCD) is computed, and quotients are stored using Delta-compressed Numerics.
  *    <li>Fixed-width Binary: one large concatenated byte[] is written, along with the fixed length.
  *        Each document's value can be addressed by maxDoc*length. 
  *    <li>Variable-width Binary: one large concatenated byte[] is written, along with end addresses 
@@ -93,6 +95,8 @@ import org.apache.lucene.util.packed.Blo
  *         <li>2 --&gt; uncompressed. When the <code>acceptableOverheadRatio</code> parameter would upgrade the number
  *             of bits required to 8, and all values fit in a byte, these are written as absolute binary values
  *             for performance.
+ *         <li>3 --&gt, gcd-compressed. When all integers share a common divisor, only quotients are stored
+ *             using blocks of delta-encoded ints.
  *      </ul>
  *   <p>MinLength and MaxLength represent the min and max byte[] value lengths for Binary values.
  *      If they are equal, then all values are of a fixed size, and can be addressed as DataOffset + (docID * length).
@@ -103,7 +107,7 @@ import org.apache.lucene.util.packed.Blo
  *   <p>For DocValues field, this stores the actual per-document data (the heavy-lifting)</p>
  *   <p>DocValues data (.dvd) --&gt; Header,&lt;NumericData | BinaryData | SortedData&gt;<sup>NumFields</sup></p>
  *   <ul>
- *     <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics</li>
+ *     <li>NumericData --&gt; DeltaCompressedNumerics | TableCompressedNumerics | UncompressedNumerics | GCDCompressedNumerics</li>
  *     <li>BinaryData --&gt;  {@link DataOutput#writeByte Byte}<sup>DataLength</sup>,Addresses</li>
  *     <li>SortedData --&gt; {@link FST FST&lt;Int64&gt;}</li>
  *     <li>DeltaCompressedNumerics --&gt; {@link BlockPackedWriter BlockPackedInts(blockSize=4096)}</li>

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/codecs/lucene42/Lucene42DocValuesProducer.java Tue May  7 11:20:55 2013
@@ -17,6 +17,11 @@ package org.apache.lucene.codecs.lucene4
  * limitations under the License.
  */
 
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.DELTA_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.GCD_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.TABLE_COMPRESSED;
+import static org.apache.lucene.codecs.lucene42.Lucene42DocValuesConsumer.UNCOMPRESSED;
+
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.HashMap;
@@ -80,14 +85,16 @@ class Lucene42DocValuesProducer extends 
     // read in the entries from the metadata file.
     IndexInput in = state.directory.openInput(metaName, state.context);
     boolean success = false;
+    final int version;
     try {
-      CodecUtil.checkHeader(in, metaCodec, 
-                                Lucene42DocValuesConsumer.VERSION_START,
-                                Lucene42DocValuesConsumer.VERSION_START);
+      version = CodecUtil.checkHeader(in, metaCodec, 
+                                      Lucene42DocValuesConsumer.VERSION_START,
+                                      Lucene42DocValuesConsumer.VERSION_CURRENT);
       numerics = new HashMap<Integer,NumericEntry>();
       binaries = new HashMap<Integer,BinaryEntry>();
       fsts = new HashMap<Integer,FSTEntry>();
       readFields(in, state.fieldInfos);
+
       success = true;
     } finally {
       if (success) {
@@ -96,12 +103,24 @@ class Lucene42DocValuesProducer extends 
         IOUtils.closeWhileHandlingException(in);
       }
     }
-    
-    String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
-    data = state.directory.openInput(dataName, state.context);
-    CodecUtil.checkHeader(data, dataCodec, 
-                                Lucene42DocValuesConsumer.VERSION_START,
-                                Lucene42DocValuesConsumer.VERSION_START);
+
+    success = false;
+    try {
+      String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension);
+      data = state.directory.openInput(dataName, state.context);
+      final int version2 = CodecUtil.checkHeader(data, dataCodec, 
+                                                 Lucene42DocValuesConsumer.VERSION_START,
+                                                 Lucene42DocValuesConsumer.VERSION_CURRENT);
+      if (version != version2) {
+        throw new CorruptIndexException("Format versions mismatch");
+      }
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this.data);
+      }
+    }
   }
   
   private void readFields(IndexInput meta, FieldInfos infos) throws IOException {
@@ -112,6 +131,15 @@ class Lucene42DocValuesProducer extends 
         NumericEntry entry = new NumericEntry();
         entry.offset = meta.readLong();
         entry.format = meta.readByte();
+        switch(entry.format) {
+          case DELTA_COMPRESSED:
+          case TABLE_COMPRESSED:
+          case GCD_COMPRESSED:
+          case UNCOMPRESSED:
+               break;
+          default:
+               throw new CorruptIndexException("Unknown format: " + entry.format + ", input=" + meta);
+        }
         if (entry.format != Lucene42DocValuesConsumer.UNCOMPRESSED) {
           entry.packedIntsVersion = meta.readVInt();
         }
@@ -152,41 +180,56 @@ class Lucene42DocValuesProducer extends 
   private NumericDocValues loadNumeric(FieldInfo field) throws IOException {
     NumericEntry entry = numerics.get(field.number);
     data.seek(entry.offset);
-    if (entry.format == Lucene42DocValuesConsumer.TABLE_COMPRESSED) {
-      int size = data.readVInt();
-      final long decode[] = new long[size];
-      for (int i = 0; i < decode.length; i++) {
-        decode[i] = data.readLong();
-      }
-      final int formatID = data.readVInt();
-      final int bitsPerValue = data.readVInt();
-      final PackedInts.Reader reader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue);
-      return new NumericDocValues() {
-        @Override
-        public long get(int docID) {
-          return decode[(int)reader.get(docID)];
-        }
-      };
-    } else if (entry.format == Lucene42DocValuesConsumer.DELTA_COMPRESSED) {
-      final int blockSize = data.readVInt();
-      final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
-      return new NumericDocValues() {
-        @Override
-        public long get(int docID) {
-          return reader.get(docID);
-        }
-      };
-    } else if (entry.format == Lucene42DocValuesConsumer.UNCOMPRESSED) {
-      final byte bytes[] = new byte[maxDoc];
-      data.readBytes(bytes, 0, bytes.length);
-      return new NumericDocValues() {
-        @Override
-        public long get(int docID) {
-          return bytes[docID];
-        }
-      };
-    } else {
-      throw new IllegalStateException();
+    switch (entry.format) {
+      case TABLE_COMPRESSED:
+        int size = data.readVInt();
+        if (size > 256) {
+          throw new CorruptIndexException("TABLE_COMPRESSED cannot have more than 256 distinct values, input=" + data);
+        }
+        final long decode[] = new long[size];
+        for (int i = 0; i < decode.length; i++) {
+          decode[i] = data.readLong();
+        }
+        final int formatID = data.readVInt();
+        final int bitsPerValue = data.readVInt();
+        final PackedInts.Reader ordsReader = PackedInts.getReaderNoHeader(data, PackedInts.Format.byId(formatID), entry.packedIntsVersion, maxDoc, bitsPerValue);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return decode[(int)ordsReader.get(docID)];
+          }
+        };
+      case DELTA_COMPRESSED:
+        final int blockSize = data.readVInt();
+        final BlockPackedReader reader = new BlockPackedReader(data, entry.packedIntsVersion, blockSize, maxDoc, false);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return reader.get(docID);
+          }
+        };
+      case UNCOMPRESSED:
+        final byte bytes[] = new byte[maxDoc];
+        data.readBytes(bytes, 0, bytes.length);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return bytes[docID];
+          }
+        };
+      case GCD_COMPRESSED:
+        final long min = data.readLong();
+        final long mult = data.readLong();
+        final int quotientBlockSize = data.readVInt();
+        final BlockPackedReader quotientReader = new BlockPackedReader(data, entry.packedIntsVersion, quotientBlockSize, maxDoc, false);
+        return new NumericDocValues() {
+          @Override
+          public long get(int docID) {
+            return min + mult * quotientReader.get(docID);
+          }
+        };
+      default:
+        throw new AssertionError();
     }
   }
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/BaseCompositeReader.java Tue May  7 11:20:55 2013
@@ -51,7 +51,6 @@ public abstract class BaseCompositeReade
   private final int[] starts;       // 1st docno for each reader
   private final int maxDoc;
   private final int numDocs;
-  private final boolean hasDeletions;
 
   /** List view solely for {@link #getSequentialSubReaders()},
    * for effectiveness the array is used internally. */
@@ -70,7 +69,6 @@ public abstract class BaseCompositeReade
     this.subReadersList = Collections.unmodifiableList(Arrays.asList(subReaders));
     starts = new int[subReaders.length + 1];    // build starts array
     int maxDoc = 0, numDocs = 0;
-    boolean hasDeletions = false;
     for (int i = 0; i < subReaders.length; i++) {
       starts[i] = maxDoc;
       final IndexReader r = subReaders[i];
@@ -79,15 +77,11 @@ public abstract class BaseCompositeReade
         throw new IllegalArgumentException("Too many documents, composite IndexReaders cannot exceed " + Integer.MAX_VALUE);
       }
       numDocs += r.numDocs();    // compute numDocs
-      if (r.hasDeletions()) {
-        hasDeletions = true;
-      }
       r.registerParentReader(this);
     }
     starts[subReaders.length] = maxDoc;
     this.maxDoc = maxDoc;
     this.numDocs = numDocs;
-    this.hasDeletions = hasDeletions;
   }
 
   @Override
@@ -117,12 +111,6 @@ public abstract class BaseCompositeReade
   }
 
   @Override
-  public final boolean hasDeletions() {
-    // Don't call ensureOpen() here (it could affect performance)
-    return hasDeletions;
-  }
-
-  @Override
   public final int docFreq(Term term) throws IOException {
     ensureOpen();
     int total = 0;          // sum freqs in subreaders

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java Tue May  7 11:20:55 2013
@@ -66,7 +66,13 @@ public abstract class CompositeReader ex
   @Override
   public String toString() {
     final StringBuilder buffer = new StringBuilder();
-    buffer.append(getClass().getSimpleName());
+    // walk up through class hierarchy to get a non-empty simple name (anonymous classes have no name):
+    for (Class<?> clazz = getClass(); clazz != null; clazz = clazz.getSuperclass()) {
+      if (!clazz.isAnonymousClass()) {
+        buffer.append(clazz.getSimpleName());
+        break;
+      }
+    }
     buffer.append('(');
     final List<? extends IndexReader> subReaders = getSequentialSubReaders();
     assert subReaders != null;

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java Tue May  7 11:20:55 2013
@@ -184,7 +184,7 @@ public class ConcurrentMergeScheduler ex
     }
 
     // Sort the merge threads in descending order.
-    CollectionUtil.mergeSort(activeMerges, compareByMergeDocCount);
+    CollectionUtil.timSort(activeMerges, compareByMergeDocCount);
     
     int pri = mergeThreadPriority;
     final int activeMergeCount = activeMerges.size();
@@ -561,4 +561,13 @@ public class ConcurrentMergeScheduler ex
     sb.append("mergeThreadPriority=").append(mergeThreadPriority);
     return sb.toString();
   }
+
+  @Override
+  public MergeScheduler clone() {
+    ConcurrentMergeScheduler clone = (ConcurrentMergeScheduler) super.clone();
+    clone.writer = null;
+    clone.dir = null;
+    clone.mergeThreads = new ArrayList<MergeThread>();
+    return clone;
+  }
 }

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/CorruptIndexException.java Tue May  7 11:20:55 2013
@@ -1,3 +1,5 @@
+package org.apache.lucene.index;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -15,8 +17,6 @@
  * limitations under the License.
  */
 
-package org.apache.lucene.index;
-
 import java.io.IOException;
 
 /**

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DirectoryReader.java Tue May  7 11:20:55 2013
@@ -25,6 +25,7 @@ import java.util.List;
 
 import org.apache.lucene.search.SearcherManager; // javadocs
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.NoSuchDirectoryException;
 
 /** DirectoryReader is an implementation of {@link CompositeReader}
  that can read indexes in a {@link Directory}. 
@@ -313,17 +314,45 @@ public abstract class DirectoryReader ex
   }
   
   /**
-   * Returns <code>true</code> if an index exists at the specified directory.
+   * Returns <code>true</code> if an index likely exists at
+   * the specified directory.  Note that if a corrupt index
+   * exists, or if an index in the process of committing 
    * @param  directory the directory to check for an index
    * @return <code>true</code> if an index exists; <code>false</code> otherwise
    */
-  public static boolean indexExists(Directory directory) {
+  public static boolean indexExists(Directory directory) throws IOException {
+    // LUCENE-2812, LUCENE-2727, LUCENE-4738: this logic will
+    // return true in cases that should arguably be false,
+    // such as only IW.prepareCommit has been called, or a
+    // corrupt first commit, but it's too deadly to make
+    // this logic "smarter" and risk accidentally returning
+    // false due to various cases like file description
+    // exhaustion, access denited, etc., because in that
+    // case IndexWriter may delete the entire index.  It's
+    // safer to err towards "index exists" than try to be
+    // smart about detecting not-yet-fully-committed or
+    // corrupt indices.  This means that IndexWriter will
+    // throw an exception on such indices and the app must
+    // resolve the situation manually:
+    String[] files;
     try {
-      new SegmentInfos().read(directory);
-      return true;
-    } catch (IOException ioe) {
+      files = directory.listAll();
+    } catch (NoSuchDirectoryException nsde) {
+      // Directory does not exist --> no index exists
       return false;
     }
+
+    // Defensive: maybe a Directory impl returns null
+    // instead of throwing NoSuchDirectoryException:
+    if (files != null) {
+      String prefix = IndexFileNames.SEGMENTS + "_";
+      for(String file : files) {
+        if (file.startsWith(prefix) || file.equals(IndexFileNames.SEGMENTS_GEN)) {
+          return true;
+        }
+      }
+    }
+    return false;
   }
 
   /**

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocFieldProcessor.java Tue May  7 11:20:55 2013
@@ -214,7 +214,7 @@ final class DocFieldProcessor extends Do
     // sort the subset of fields that have vectors
     // enabled; we could save [small amount of] CPU
     // here.
-    ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
+    ArrayUtil.introSort(fields, 0, fieldCount, fieldsComp);
     for(int i=0;i<fieldCount;i++) {
       final DocFieldProcessorPerField perField = fields[i];
       perField.consumer.processFields(perField.fields, perField.fieldCount, segmentInfo, trackingDirectory);

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriter.java Tue May  7 11:20:55 2013
@@ -18,6 +18,7 @@ package org.apache.lucene.index;
  */
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
 import java.util.List;
@@ -477,21 +478,21 @@ final class DocumentsWriter {
         
         // TODO: somehow we should fix this merge so it's
         // abortable so that IW.close(false) is able to stop it
-        TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(
-            directory);
+        TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
         
-        SegmentMerger merger = new SegmentMerger(info, infoStream, trackingDir,
+        final List<AtomicReader> mergeReaders = new ArrayList<AtomicReader>();
+        AtomicReader reader;
+        while ((reader = updates.nextReader()) != null) { // add new indexes
+          mergeReaders.add(reader);
+        }
+        
+        SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir,
             interval, MergeState.CheckAbort.NONE, globalFieldNumberMap, context);
         
         updates.startWriting(infoPerCommit.getNextUpdateGen(),
             infoPerCommit.info.getDocCount(), indexWriter.getConfig()
                 .getReaderTermsIndexDivisor());
 
-        AtomicReader reader;
-        while ((reader = updates.nextReader()) != null) { // add new indexes
-          merger.add(reader);
-        }
-        
         Set<String> generationReplacementFilenames = null;
         boolean success = false;
         try {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java Tue May  7 11:20:55 2013
@@ -645,7 +645,7 @@ class DocumentsWriterPerThread {
 
     SegmentInfoPerCommit newSegment = flushedSegment.segmentInfo;
 
-    IndexWriter.setDiagnostics(newSegment.info, "flush");
+    IndexWriter.setDiagnostics(newSegment.info, IndexWriter.SOURCE_FLUSH);
     
     IOContext context = new IOContext(new FlushInfo(newSegment.info.getDocCount(), newSegment.sizeInBytes()));
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java Tue May  7 11:20:55 2013
@@ -17,15 +17,16 @@ package org.apache.lucene.index;
  * limitations under the License.
  */
 
-import org.apache.lucene.util.AttributeSource;
-import org.apache.lucene.util.Bits;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
-
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.Iterator;
 
+import org.apache.lucene.search.CachingWrapperFilter;
+import org.apache.lucene.search.FieldCache;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.BytesRef;
+
 /**  A <code>FilterAtomicReader</code> contains another AtomicReader, which it
  * uses as its basic source of data, possibly transforming the data along the
  * way or providing additional functionality. The class
@@ -34,6 +35,15 @@ import java.util.Iterator;
  * contained index reader. Subclasses of <code>FilterAtomicReader</code> may
  * further override some of these methods and may also provide additional
  * methods and fields.
+ * <p><b>NOTE</b>: If you override {@link #getLiveDocs()}, you will likely need
+ * to override {@link #numDocs()} as well and vice-versa.
+ * <p><b>NOTE</b>: If this {@link FilterAtomicReader} does not change the
+ * content the contained reader, you could consider overriding
+ * {@link #getCoreCacheKey()} so that {@link FieldCache} and
+ * {@link CachingWrapperFilter} share the same entries for this atomic reader
+ * and the wrapped one. {@link #getCombinedCoreAndDeletesKey()} could be
+ * overridden as well if the {@link #getLiveDocs() live docs} are not changed
+ * either.
  */
 public class FilterAtomicReader extends AtomicReader {
 
@@ -67,8 +77,11 @@ public class FilterAtomicReader extends 
     }
   }
 
-  /** Base class for filtering {@link Terms}
-   *  implementations. */
+  /** Base class for filtering {@link Terms} implementations.
+   * <p><b>NOTE</b>: If the order of terms and documents is not changed, and if
+   * these terms are going to be intersected with automata, you could consider
+   * overriding {@link #intersect} for better performance.
+   */
   public static class FilterTerms extends Terms {
     /** The underlying Terms instance. */
     protected final Terms in;
@@ -85,7 +98,7 @@ public class FilterAtomicReader extends 
     public TermsEnum iterator(TermsEnum reuse) throws IOException {
       return in.iterator(reuse);
     }
-
+    
     @Override
     public Comparator<BytesRef> getComparator() {
       return in.getComparator();
@@ -110,11 +123,6 @@ public class FilterAtomicReader extends 
     public int getDocCount() throws IOException {
       return in.getDocCount();
     }
-    
-    @Override
-    public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
-      return in.intersect(automaton, bytes);
-    }
 
     @Override
     public boolean hasOffsets() {
@@ -144,8 +152,8 @@ public class FilterAtomicReader extends 
     public FilterTermsEnum(TermsEnum in) { this.in = in; }
 
     @Override
-    public boolean seekExact(BytesRef text, boolean useCache) throws IOException {
-      return in.seekExact(text, useCache);
+    public AttributeSource attributes() {
+      return in.attributes();
     }
 
     @Override
@@ -197,21 +205,6 @@ public class FilterAtomicReader extends 
     public Comparator<BytesRef> getComparator() {
       return in.getComparator();
     }
-
-    @Override
-    public void seekExact(BytesRef term, TermState state) throws IOException {
-      in.seekExact(term, state);
-    }
-
-    @Override
-    public TermState termState() throws IOException {
-      return in.termState();
-    }
-    
-    @Override
-    public AttributeSource attributes() {
-      return in.attributes();
-    }
   }
 
   /** Base class for filtering {@link DocsEnum} implementations. */
@@ -228,6 +221,11 @@ public class FilterAtomicReader extends 
     }
 
     @Override
+    public AttributeSource attributes() {
+      return in.attributes();
+    }
+
+    @Override
     public int docID() {
       return in.docID();
     }
@@ -246,11 +244,6 @@ public class FilterAtomicReader extends 
     public int advance(int target) throws IOException {
       return in.advance(target);
     }
-    
-    @Override
-    public AttributeSource attributes() {
-      return in.attributes();
-    }
 
     @Override
     public long cost() {
@@ -272,6 +265,11 @@ public class FilterAtomicReader extends 
     }
 
     @Override
+    public AttributeSource attributes() {
+      return in.attributes();
+    }
+
+    @Override
     public int docID() {
       return in.docID();
     }
@@ -312,11 +310,6 @@ public class FilterAtomicReader extends 
     }
     
     @Override
-    public AttributeSource attributes() {
-      return in.attributes();
-    }
-    
-    @Override
     public long cost() {
       return in.cost();
     }
@@ -373,12 +366,6 @@ public class FilterAtomicReader extends 
   }
 
   @Override
-  public boolean hasDeletions() {
-    ensureOpen();
-    return in.hasDeletions();
-  }
-
-  @Override
   protected void doClose() throws IOException {
     in.close();
   }
@@ -389,24 +376,6 @@ public class FilterAtomicReader extends 
     return in.fields();
   }
 
-  /** {@inheritDoc}
-   * <p>If the subclass of FilteredIndexReader modifies the
-   *  contents (but not liveDocs) of the index, you must override this
-   *  method to provide a different key. */
-  @Override
-  public Object getCoreCacheKey() {
-    return in.getCoreCacheKey();
-  }
-
-  /** {@inheritDoc}
-   * <p>If the subclass of FilteredIndexReader modifies the
-   *  liveDocs, you must override this
-   *  method to provide a different key. */
-  @Override
-  public Object getCombinedCoreAndDeletesKey() {
-    return in.getCombinedCoreAndDeletesKey();
-  }
-
   @Override
   public String toString() {
     final StringBuilder buffer = new StringBuilder("FilterAtomicReader(");
@@ -444,4 +413,5 @@ public class FilterAtomicReader extends 
     ensureOpen();
     return in.getNormValues(field);
   }
+
 }

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriter.java Tue May  7 11:20:55 2013
@@ -55,7 +55,7 @@ final class FreqProxTermsWriter extends 
     final int numAllFields = allFields.size();
 
     // Sort by field name
-    CollectionUtil.quickSort(allFields);
+    CollectionUtil.introSort(allFields);
 
     final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state);
 

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/FrozenBufferedDeletes.java Tue May  7 11:20:55 2013
@@ -60,36 +60,23 @@ class FrozenBufferedDeletes {
 
   public FrozenBufferedDeletes(BufferedDeletes deletes, BufferedUpdates updates, boolean isSegmentPrivate) {
     this.isSegmentPrivate = isSegmentPrivate;
-    int localBytesUsed = 0;
-    if (deletes != null) {
-      assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
-      Term termsArray[] = deletes.terms.keySet().toArray(
-          new Term[deletes.terms.size()]);
-      termCount = termsArray.length;
-      ArrayUtil.mergeSort(termsArray);
-      PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
-      for (Term term : termsArray) {
-        builder.add(term);
-      }
-      terms = builder.finish();
-      localBytesUsed += (int) terms.getSizeInBytes();
-      
-      queries = new Query[deletes.queries.size()];
-      queryLimits = new int[deletes.queries.size()];
-      int upto = 0;
-      for (Map.Entry<Query,Integer> ent : deletes.queries.entrySet()) {
-        queries[upto] = ent.getKey();
-        queryLimits[upto] = ent.getValue();
-        upto++;
-      }
-      
-      localBytesUsed += queries.length * BYTES_PER_DEL_QUERY;
-      numTermDeletes = deletes.numTermDeletes.get();
-    } else { 
-      terms = null;
-      numTermDeletes = 0;
-      queries = null;
-      queryLimits = null;
+    assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries"; 
+    Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
+    termCount = termsArray.length;
+    ArrayUtil.timSort(termsArray);
+    PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
+    for (Term term : termsArray) {
+      builder.add(term);
+    }
+    terms = builder.finish();
+    
+    queries = new Query[deletes.queries.size()];
+    queryLimits = new int[deletes.queries.size()];
+    int upto = 0;
+    for(Map.Entry<Query,Integer> ent : deletes.queries.entrySet()) {
+      queries[upto] = ent.getKey();
+      queryLimits[upto] = ent.getValue();
+      upto++;
     }
     
     // freeze updates
@@ -100,10 +87,10 @@ class FrozenBufferedDeletes {
       for (SortedSet<FieldsUpdate> list : updates.terms.values()) {
         allUpdates.addAll(list);
       }
-      localBytesUsed += 100;
     }
     
-    bytesUsed = localBytesUsed;
+    bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY + 100 /* updates */;
+    numTermDeletes = deletes.numTermDeletes.get();
   }
   
   public void setDelGen(long gen) {

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexDeletionPolicy.java Tue May  7 11:20:55 2013
@@ -48,7 +48,10 @@ import java.io.IOException;
  * for details.</p>
  */
 
-public interface IndexDeletionPolicy {
+public abstract class IndexDeletionPolicy implements Cloneable {
+
+  /** Sole constructor, typically called by sub-classes constructors. */
+  protected IndexDeletionPolicy() {}
 
   /**
    * <p>This is called once when a writer is first
@@ -70,7 +73,7 @@ public interface IndexDeletionPolicy {
    * {@link IndexCommit point-in-time commits},
    *  sorted by age (the 0th one is the oldest commit).
    */
-  public void onInit(List<? extends IndexCommit> commits) throws IOException;
+  public abstract void onInit(List<? extends IndexCommit> commits) throws IOException;
 
   /**
    * <p>This is called each time the writer completed a commit.
@@ -94,5 +97,15 @@ public interface IndexDeletionPolicy {
    * @param commits List of {@link IndexCommit},
    *  sorted by age (the 0th one is the oldest commit).
    */
-  public void onCommit(List<? extends IndexCommit> commits) throws IOException;
+  public abstract void onCommit(List<? extends IndexCommit> commits) throws IOException;
+
+  @Override
+  public IndexDeletionPolicy clone() {
+    try {
+      return (IndexDeletionPolicy) super.clone();
+    } catch (CloneNotSupportedException e) {
+      throw new Error(e);
+    }
+  }
+
 }

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexFileDeleter.java Tue May  7 11:20:55 2013
@@ -123,7 +123,7 @@ final class IndexFileDeleter implements 
    * @throws IOException if there is a low-level IO error
    */
   public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos,
-                          InfoStream infoStream, IndexWriter writer) throws IOException {
+                          InfoStream infoStream, IndexWriter writer, boolean initialIndexExists) throws IOException {
     this.infoStream = infoStream;
     this.writer = writer;
 
@@ -209,7 +209,7 @@ final class IndexFileDeleter implements 
       }
     }
 
-    if (currentCommitPoint == null && currentSegmentsFile != null) {
+    if (currentCommitPoint == null && currentSegmentsFile != null && initialIndexExists) {
       // We did not in fact see the segments_N file
       // corresponding to the segmentInfos that was passed
       // in.  Yet, it must exist, because our caller holds
@@ -221,7 +221,7 @@ final class IndexFileDeleter implements 
       try {
         sis.read(directory, currentSegmentsFile);
       } catch (IOException e) {
-        throw new CorruptIndexException("failed to locate current segments_N file");
+        throw new CorruptIndexException("failed to locate current segments_N file \"" + currentSegmentsFile + "\"");
       }
       if (infoStream.isEnabled("IFD")) {
         infoStream.message("IFD", "forced open of current segments file " + segmentInfos.getSegmentsFileName());
@@ -232,7 +232,7 @@ final class IndexFileDeleter implements 
     }
 
     // We keep commits list in sorted order (oldest to newest):
-    CollectionUtil.mergeSort(commits);
+    CollectionUtil.timSort(commits);
 
     // Now delete anything with ref count at 0.  These are
     // presumably abandoned files eg due to crash of
@@ -250,9 +250,7 @@ final class IndexFileDeleter implements 
 
     // Finally, give policy a chance to remove things on
     // startup:
-    if (currentSegmentsFile != null) {
-      policy.onInit(commits);
-    }
+    policy.onInit(commits);
 
     // Always protect the incoming segmentInfos since
     // sometime it may not be the most recent commit

Modified: lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexReader.java?rev=1479862&r1=1479861&r2=1479862&view=diff
==============================================================================
--- lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/dev/branches/lucene4258/lucene/core/src/java/org/apache/lucene/index/IndexReader.java Tue May  7 11:20:55 2013
@@ -22,15 +22,14 @@ import java.io.IOException;
 import java.util.Collections;
 import java.util.LinkedHashSet;
 import java.util.List;
-import java.util.WeakHashMap;
 import java.util.Set;
+import java.util.WeakHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
 
-import org.apache.lucene.document.Document;
 import org.apache.lucene.document.DocumentStoredFieldVisitor;
-import org.apache.lucene.search.SearcherManager; // javadocs
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.util.Bits;
+// javadocs
 
 /** IndexReader is an abstract class, providing an interface for accessing an
  index.  Search of an index is done entirely through this abstract interface,
@@ -179,7 +178,7 @@ public abstract class IndexReader implem
    * and returns <code>true</code> iff the refCount was
    * successfully incremented, otherwise <code>false</code>.
    * If this method returns <code>false</code> the reader is either
-   * already closed or is currently been closed. Either way this
+   * already closed or is currently being closed. Either way this
    * reader instance shouldn't be used by an application unless
    * <code>true</code> is returned.
    * <p>
@@ -361,8 +360,12 @@ public abstract class IndexReader implem
     return visitor.getDocument();
   }
 
-  /** Returns true if any documents have been deleted */
-  public abstract boolean hasDeletions();
+  /** Returns true if any documents have been deleted. Implementers should
+   *  consider overriding this method if {@link #maxDoc()} or {@link #numDocs()}
+   *  are not constant-time operations. */
+  public boolean hasDeletions() {
+    return numDeletedDocs() > 0;
+  }
 
   /**
    * Closes files associated with this index.
@@ -415,7 +418,7 @@ public abstract class IndexReader implem
    * it again.
    * This key must not have equals()/hashCode() methods, so &quot;equals&quot; means &quot;identical&quot;. */
   public Object getCoreCacheKey() {
-    // Don't can ensureOpen since FC calls this (to evict)
+    // Don't call ensureOpen since FC calls this (to evict)
     // on close
     return this;
   }
@@ -424,7 +427,7 @@ public abstract class IndexReader implem
    * so FieldCache/CachingWrapperFilter can find it again.
    * This key must not have equals()/hashCode() methods, so &quot;equals&quot; means &quot;identical&quot;. */
   public Object getCombinedCoreAndDeletesKey() {
-    // Don't can ensureOpen since FC calls this (to evict)
+    // Don't call ensureOpen since FC calls this (to evict)
     // on close
     return this;
   }
@@ -438,12 +441,11 @@ public abstract class IndexReader implem
    */
   public abstract int docFreq(Term term) throws IOException;
   
-  /** Returns the number of documents containing the term
-   * <code>term</code>.  This method returns 0 if the term or
-   * field does not exists, or -1 if the Codec does not support
-   * the measure.  This method does not take into account deleted 
-   * documents that have not yet been merged away.
-   * @see TermsEnum#totalTermFreq() 
+  /**
+   * Returns the total number of occurrences of {@code term} across all
+   * documents (the sum of the freq() for each doc that has this term). This
+   * will be -1 if the codec doesn't support this measure. Note that, like other
+   * term measures, this measure does not take deleted documents into account.
    */
   public abstract long totalTermFreq(Term term) throws IOException;