You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2015/04/14 21:12:10 UTC
svn commit: r1673526 - in /lucene/dev/branches/branch_5x: ./ lucene/ lucene/backward-codecs/ lucene/backward-codecs/src/test/org/apache/lucene/index/ lucene/core/ lucene/core/src/java/org/apache/lucene/codecs/blocktree/ lucene/core/src/java/org/apache/...

Author: mikemccand
Date: Tue Apr 14 19:12:09 2015
New Revision: 1673526

URL: http://svn.apache.org/r1673526
Log:
LUCENE-6382: enforce max allowed indexed position

Added:
    lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/TestMaxPositionInOldIndex.java
      - copied unchanged from r1673508, lucene/dev/trunk/lucene/backward-codecs/src/test/org/apache/lucene/index/TestMaxPositionInOldIndex.java
    lucene/dev/branches/branch_5x/lucene/backward-codecs/src/test/org/apache/lucene/index/maxposindex.zip
      - copied unchanged from r1673508, lucene/dev/trunk/lucene/backward-codecs/src/test/org/apache/lucene/index/maxposindex.zip
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestMaxPosition.java
      - copied unchanged from r1673508, lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestMaxPosition.java
Modified:
    lucene/dev/branches/branch_5x/   (props changed)
    lucene/dev/branches/branch_5x/lucene/   (props changed)
    lucene/dev/branches/branch_5x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_5x/lucene/backward-codecs/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/   (props changed)
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/Version.java
    lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/Test2BPositions.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
    lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
    lucene/dev/branches/branch_5x/lucene/test-framework/   (props changed)
    lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java

Modified: lucene/dev/branches/branch_5x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/CHANGES.txt?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_5x/lucene/CHANGES.txt Tue Apr 14 19:12:09 2015
@@ -84,6 +84,9 @@ Other
 * LUCENE-6399: Benchmark module's QueryMaker.resetInputs should call setConfig
   so queries can react to property changes in new rounds. (David Smiley)
 
+* LUCENE-6382: Lucene now enforces that positions never exceed the
+  maximum value IndexWriter.MAX_POSITION.  (Robert Muir, Mike McCandless)
+
 Build
 
 * LUCENE-6420: Update forbiddenapis to v1.8  (Uwe Schindler)

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java Tue Apr 14 19:12:09 2015
@@ -211,12 +211,17 @@ class AutoPrefixTermsWriter {
       }
     }
 
+    // Even though we visited terms in already-sorted order, the prefixes
+    // can be slightly unsorted, e.g. aaaaa will be before aaa, so we
+    // must sort here so our caller can do merge sort into actual terms
+    // when writing.  Probably we should use CollectionUtil.timSort here?
     Collections.sort(prefixes);
   }
 
   /** Pushes the new term to the top of the stack, and writes new blocks. */
   private void pushTerm(BytesRef text) throws IOException {
     int limit = Math.min(lastTerm.length(), text.length);
+    //if (DEBUG) System.out.println("\nterm: " + text.utf8ToString());
 
     // Find common prefix between last term and current term:
     int pos = 0;
@@ -234,10 +239,10 @@ class AutoPrefixTermsWriter {
       int prefixTopSize = pending.size() - prefixStarts[i];
 
       while (prefixTopSize >= minItemsInPrefix) {       
-        //if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInPrefix);
+        //if (DEBUG) System.out.println("  pop: i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInPrefix);
         savePrefixes(i+1, prefixTopSize);
         //prefixStarts[i] -= prefixTopSize;
-        //System.out.println("    after savePrefixes: " + (pending.size() - prefixStarts[i]) + " pending.size()=" + pending.size() + " start=" + prefixStarts[i]);
+        //if (DEBUG) System.out.println("    after savePrefixes: " + (pending.size() - prefixStarts[i]) + " pending.size()=" + pending.size() + " start=" + prefixStarts[i]);
 
         // For large floor blocks, it's possible we should now re-run on the new prefix terms we just created:
         prefixTopSize = pending.size() - prefixStarts[i];
@@ -267,27 +272,52 @@ class AutoPrefixTermsWriter {
 
     assert count > 0;
 
-    //if (DEBUG2) {
-    //  BytesRef br = new BytesRef(lastTerm.bytes());
-    //  br.length = prefixLength;
-    //  System.out.println("  savePrefixes: seg=" + segment + " " + brToString(br) + " count=" + count + " pending.size()=" + pending.size());
-    //}
+    /*
+    if (DEBUG2) {
+      BytesRef br = new BytesRef(lastTerm.bytes());
+      br.length = prefixLength;
+      //System.out.println("  savePrefixes: seg=" + segment + " " + brToString(br) + " count=" + count + " pending.size()=" + pending.size());
+      System.out.println("  savePrefixes: " + brToString(br) + " count=" + count + " pending.size()=" + pending.size());
+    }
+    */
 
     int lastSuffixLeadLabel = -2;
 
     int start = pending.size()-count;
     assert start >=0;
 
+    // Special case empty-string suffix case: we are being asked to build prefix terms for all aaa* terms, but 
+    // the exact term aaa is here, and we must skip it (it is handled "higher", under the aa* terms):
+    Object o = pending.get(start);
+    boolean skippedEmptyStringSuffix = false;
+    if (o instanceof byte[]) {
+      if (((byte[]) o).length == prefixLength) {
+        start++;
+        count--;
+        //if (DEBUG) System.out.println("  skip empty-string term suffix");
+        skippedEmptyStringSuffix = true;
+      }
+    } else {
+      PrefixTerm prefix = (PrefixTerm) o;
+      if (prefix.term.bytes.length == prefixLength) {
+        start++;
+        count--;
+        //if (DEBUG) System.out.println("  skip empty-string PT suffix");
+        skippedEmptyStringSuffix = true;
+      }
+    }
+
     int end = pending.size();
     int nextBlockStart = start;
     int nextFloorLeadLabel = -1;
     int prefixCount = 0;
-    int pendingCount = 0;
+
     PrefixTerm lastPTEntry = null;
+
     for (int i=start; i<end; i++) {
 
       byte[] termBytes;
-      Object o = pending.get(i);
+      o = pending.get(i);
       PrefixTerm ptEntry;
       if (o instanceof byte[]) {
         ptEntry = null;
@@ -300,23 +330,15 @@ class AutoPrefixTermsWriter {
           ptEntry = null;
         }
       }
-      pendingCount++;
 
-      //if (DEBUG) System.out.println("    check term=" + brToString(new BytesRef(termBytes)));
+      //if (DEBUG) System.out.println("    check term=" + brToString(new BytesRef(termBytes)) + " o=" + o);
 
-      int suffixLeadLabel;
+      // We handled the empty-string suffix case up front:
+      assert termBytes.length > prefixLength;
 
-      if (termBytes.length == prefixLength) {
-        // Suffix is 0, i.e. prefix 'foo' and term is
-        // 'foo' so the term has empty string suffix
-        // in this block
-        assert lastSuffixLeadLabel == -2;
-        suffixLeadLabel = -2;
-      } else {
-        suffixLeadLabel = termBytes[prefixLength] & 0xff;
-      }
+      int suffixLeadLabel = termBytes[prefixLength] & 0xff;
 
-      // if (DEBUG) System.out.println("  i=" + i + " ent=" + ent + " suffixLeadLabel=" + suffixLeadLabel);
+      //if (DEBUG) System.out.println("  i=" + i + " o=" + o + " suffixLeadLabel=" + Integer.toHexString(suffixLeadLabel) + " pendingCount=" + (i - nextBlockStart) + " min=" + minItemsInPrefix);
 
       if (suffixLeadLabel != lastSuffixLeadLabel) {
         // This is a boundary, a chance to make an auto-prefix term if we want:
@@ -327,8 +349,9 @@ class AutoPrefixTermsWriter {
         // than the lead start of the current entry:
         assert suffixLeadLabel > lastSuffixLeadLabel: "suffixLeadLabel=" + suffixLeadLabel + " vs lastSuffixLeadLabel=" + lastSuffixLeadLabel;
 
-        // NOTE: must check nextFloorLeadLabel in case minItemsInPrefix is 2 and prefix is 'a' and we've seen 'a' and then 'aa'
-        if (pendingCount >= minItemsInPrefix && end-nextBlockStart > maxItemsInPrefix && nextFloorLeadLabel != -1) {
+        int itemsInBlock = i - nextBlockStart;
+
+        if (itemsInBlock >= minItemsInPrefix && end-nextBlockStart > maxItemsInPrefix) {
           // The count is too large for one block, so we must break it into "floor" blocks, where we record
           // the leading label of the suffix of the first term in each floor block, so at search time we can
           // jump to the right floor block.  We just use a naive greedy segmenter here: make a new floor
@@ -338,11 +361,10 @@ class AutoPrefixTermsWriter {
           // If the last entry was another prefix term of the same length, then it represents a range of terms, so we must use its ending
           // prefix label as our ending label:
           if (lastPTEntry != null) {
+            //if (DEBUG) System.out.println("  use last");
             lastSuffixLeadLabel = lastPTEntry.floorLeadEnd;
           }
-
           savePrefix(prefixLength, nextFloorLeadLabel, lastSuffixLeadLabel);
-          pendingCount = 0;
 
           prefixCount++;
           nextFloorLeadLabel = suffixLeadLabel;
@@ -356,6 +378,7 @@ class AutoPrefixTermsWriter {
 
         lastSuffixLeadLabel = suffixLeadLabel;
       }
+
       lastPTEntry = ptEntry;
     }
 
@@ -370,6 +393,12 @@ class AutoPrefixTermsWriter {
         if (prefixLength > 0) {
           savePrefix(prefixLength, -2, 0xff);
           prefixCount++;
+          
+          // If we skipped empty string suffix, e.g. term aaa for prefix aaa*, since we
+          // are now writing the full aaa* prefix term, we include it here:
+          if (skippedEmptyStringSuffix) {
+            count++;
+          }
         } else {
           // Don't add a prefix term for all terms in the index!
         }
@@ -384,16 +413,8 @@ class AutoPrefixTermsWriter {
     }
 
     // Remove slice from the top of the pending stack, that we just wrote:
-    int sizeToClear = count;
-    if (prefixCount > 1) {
-      Object o = pending.get(pending.size()-count);
-      if (o instanceof byte[] && ((byte[]) o).length == prefixLength) {
-        // If we were just asked to write all f* terms, but there were too many and so we made floor blocks, the exact term 'f' will remain
-        // as its own item, followed by floor block terms like f[a-m]*, f[n-z]*, so in this case we leave 3 (not 2) items on the pending stack:
-        sizeToClear--;
-      }
-    }
-    pending.subList(pending.size()-sizeToClear, pending.size()).clear();
+
+    pending.subList(pending.size()-count, pending.size()).clear();
 
     // Append prefix terms for each prefix, since these count like real terms that also need to be "rolled up":
     for(int i=0;i<prefixCount;i++) {
@@ -410,6 +431,8 @@ class AutoPrefixTermsWriter {
 
     PrefixTerm pt = new PrefixTerm(prefix, floorLeadStart, floorLeadEnd); 
     //if (DEBUG2) System.out.println("    savePrefix: seg=" + segment + " " + pt + " count=" + count);
+    //if (DEBUG) System.out.println("    savePrefix: " + pt);
+
     prefixes.add(pt);
   }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50PostingsWriter.java Tue Apr 14 19:12:09 2015
@@ -36,6 +36,7 @@ import org.apache.lucene.codecs.lucene50
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexOutput;
@@ -250,6 +251,12 @@ public final class Lucene50PostingsWrite
 
   @Override
   public void addPosition(int position, BytesRef payload, int startOffset, int endOffset) throws IOException {
+    if (position > IndexWriter.MAX_POSITION) {
+      throw new CorruptIndexException("position=" + position + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + ")", docOut);
+    }
+    if (position < 0) {
+      throw new CorruptIndexException("position=" + position + " is < 0", docOut);
+    }
     posDeltaBuffer[posBufferUpto] = position - lastPosition;
     if (writePayloads) {
       if (payload == null || payload.length == 0) {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java Tue Apr 14 19:12:09 2015
@@ -217,6 +217,7 @@ public abstract class PerFieldPostingsFo
 
     private final Map<String,FieldsProducer> fields = new TreeMap<>();
     private final Map<String,FieldsProducer> formats = new HashMap<>();
+    private final String segment;
     
     // clone for merge
     FieldsReader(FieldsReader other) throws IOException {
@@ -234,6 +235,8 @@ public abstract class PerFieldPostingsFo
         assert producer != null;
         fields.put(ent.getKey(), producer);
       }
+
+      segment = other.segment;
     }
 
     public FieldsReader(final SegmentReadState readState) throws IOException {
@@ -267,6 +270,8 @@ public abstract class PerFieldPostingsFo
           IOUtils.closeWhileHandlingException(formats.values());
         }
       }
+
+      this.segment = readState.segmentInfo.name;
     }
 
     @Override
@@ -320,7 +325,7 @@ public abstract class PerFieldPostingsFo
 
     @Override
     public String toString() {
-      return "PerFieldPostings(formats=" + formats.size() + ")";
+      return "PerFieldPostings(segment=" + segment + " formats=" + formats.size() + ")";
     }
   }
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Tue Apr 14 19:12:09 2015
@@ -1341,6 +1341,9 @@ public class CheckIndex implements Close
               if (pos < 0) {
                 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
               }
+              if (pos > IndexWriter.MAX_POSITION) {
+                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " > IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION);
+              }
               if (pos < lastPos) {
                 throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
               }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/DefaultIndexingChain.java Tue Apr 14 19:12:09 2015
@@ -623,8 +623,11 @@ final class DefaultIndexingChain extends
           if (invertState.position < invertState.lastPosition) {
             if (posIncr == 0) {
               throw new IllegalArgumentException("first position increment must be > 0 (got 0) for field '" + field.name() + "'");
+            } else {
+              throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
             }
-            throw new IllegalArgumentException("position increments (and gaps) must be >= 0 (got " + posIncr + ") for field '" + field.name() + "'");
+          } else if (invertState.position > IndexWriter.MAX_POSITION) {
+            throw new IllegalArgumentException("position " + invertState.position + " is too large for field '" + field.name() + "': max allowed position is " + IndexWriter.MAX_POSITION);
           }
           invertState.lastPosition = invertState.position;
           if (posIncr == 0) {

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java Tue Apr 14 19:12:09 2015
@@ -203,6 +203,9 @@ public class IndexWriter implements Clos
   // ArrayUtil.MAX_ARRAY_LENGTH here because this can vary across JVMs:
   public static final int MAX_DOCS = Integer.MAX_VALUE - 128;
 
+  /** Maximum value of the token position in an indexed field. */
+  public static final int MAX_POSITION = Integer.MAX_VALUE - 128;
+
   // Use package-private instance var to enforce the limit so testing
   // can use less electricity:
   private static int actualMaxDocs = MAX_DOCS;

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappedMultiFields.java Tue Apr 14 19:12:09 2015
@@ -46,21 +46,23 @@ public class MappedMultiFields extends F
     if (terms == null) {
       return null;
     } else {
-      return new MappedMultiTerms(mergeState, terms);
+      return new MappedMultiTerms(field, mergeState, terms);
     }
   }
 
   private static class MappedMultiTerms extends FilterTerms {
     final MergeState mergeState;
+    final String field;
 
-    public MappedMultiTerms(MergeState mergeState, MultiTerms multiTerms) {
+    public MappedMultiTerms(String field, MergeState mergeState, MultiTerms multiTerms) {
       super(multiTerms);
+      this.field = field;
       this.mergeState = mergeState;
     }
 
     @Override
     public TermsEnum iterator() throws IOException {
-      return new MappedMultiTermsEnum(mergeState, (MultiTermsEnum) in.iterator());
+      return new MappedMultiTermsEnum(field, mergeState, (MultiTermsEnum) in.iterator());
     }
 
     @Override
@@ -86,9 +88,11 @@ public class MappedMultiFields extends F
 
   private static class MappedMultiTermsEnum extends FilterTermsEnum {
     final MergeState mergeState;
+    final String field;
 
-    public MappedMultiTermsEnum(MergeState mergeState, MultiTermsEnum multiTermsEnum) {
+    public MappedMultiTermsEnum(String field, MergeState mergeState, MultiTermsEnum multiTermsEnum) {
       super(multiTermsEnum);
+      this.field = field;
       this.mergeState = mergeState;
     }
 
@@ -110,26 +114,19 @@ public class MappedMultiFields extends F
 
       MappingMultiPostingsEnum mappingDocsAndPositionsEnum;
       if (reuse instanceof MappingMultiPostingsEnum) {
-        mappingDocsAndPositionsEnum = (MappingMultiPostingsEnum) reuse;
+        MappingMultiPostingsEnum postings = (MappingMultiPostingsEnum) reuse;
+        if (postings.field.equals(this.field)) {
+          mappingDocsAndPositionsEnum = postings;
+        } else {
+          mappingDocsAndPositionsEnum = new MappingMultiPostingsEnum(field, mergeState);
+        }
       } else {
-        mappingDocsAndPositionsEnum = new MappingMultiPostingsEnum(mergeState);
+        mappingDocsAndPositionsEnum = new MappingMultiPostingsEnum(field, mergeState);
       }
 
       MultiPostingsEnum docsAndPositionsEnum = (MultiPostingsEnum) in.postings(liveDocs, mappingDocsAndPositionsEnum.multiDocsAndPositionsEnum, flags);
       mappingDocsAndPositionsEnum.reset(docsAndPositionsEnum);
       return mappingDocsAndPositionsEnum;
-
-/*
-      MappingMultiDocsEnum mappingDocsEnum;
-      if (reuse instanceof MappingMultiDocsEnum) {
-        mappingDocsEnum = (MappingMultiDocsEnum) reuse;
-      } else {
-        mappingDocsEnum = new MappingMultiDocsEnum(mergeState);
-      }
-      
-      MultiDocsEnum docsEnum = (MultiDocsEnum) in.docs(liveDocs, mappingDocsEnum.multiDocsEnum, flags);
-      mappingDocsEnum.reset(docsEnum);
-      return mappingDocsEnum;*/
     }
   }
 }

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java Tue Apr 14 19:12:09 2015
@@ -39,9 +39,11 @@ final class MappingMultiPostingsEnum ext
   int doc = -1;
   private MergeState mergeState;
   MultiPostingsEnum multiDocsAndPositionsEnum;
+  final String field;
 
   /** Sole constructor. */
-  public MappingMultiPostingsEnum(MergeState mergeState) {
+  public MappingMultiPostingsEnum(String field, MergeState mergeState) {
+    this.field = field;
     this.mergeState = mergeState;
   }
 
@@ -112,9 +114,17 @@ final class MappingMultiPostingsEnum ext
 
   @Override
   public int nextPosition() throws IOException {
-    return current.nextPosition();
+    int pos = current.nextPosition();
+    if (pos < 0) {
+      throw new CorruptIndexException("position=" + pos + " is negative, field=\"" + field + " doc=" + doc,
+                                      mergeState.fieldsProducers[upto].toString());
+    } else if (pos > IndexWriter.MAX_POSITION) {
+      throw new CorruptIndexException("position=" + pos + " is too large (> IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION + "), field=\"" + field + "\" doc=" + doc,
+                                      mergeState.fieldsProducers[upto].toString());
+    }
+    return pos;
   }
-
+  
   @Override
   public int startOffset() throws IOException {
     return current.startOffset();

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/Version.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/Version.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/Version.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/Version.java Tue Apr 14 19:12:09 2015
@@ -417,7 +417,7 @@ public final class Version {
   /** Returns a new version based on raw numbers
    *
    *  @lucene.internal */
-  public static final Version fromBits(int major, int minor, int bugfix) {
+  public static Version fromBits(int major, int minor, int bugfix) {
     return new Version(major, minor, bugfix);
   }
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java Tue Apr 14 19:12:09 2015
@@ -32,6 +32,7 @@ package org.apache.lucene.util.automaton
 import java.util.*;
 
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.StringHelper;
 
 /**
  * Construction of basic automata.
@@ -216,6 +217,16 @@ final public class Automata {
     return s;
   }
 
+  private static boolean suffixIsZeros(BytesRef br, int len) {
+    for(int i=len;i<br.length;i++) {
+      if (br.bytes[br.offset+i] != 0) {
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   /** Creates a new deterministic, minimal automaton accepting
    *  all binary terms in the specified interval.  Note that unlike
    *  {@link #makeDecimalInterval}, the returned automaton is infinite,
@@ -238,17 +249,6 @@ final public class Automata {
       minInclusive = true;
     }
 
-    // Empty string corner cases:
-    if (max != null && maxInclusive == false && max.length == 1 && max.bytes[max.offset] == 0) {
-      max = new BytesRef();
-      maxInclusive = true;
-    }
-
-    if (min != null && minInclusive == false && min.length == 0) {
-      min = new BytesRef(new byte[1]);
-      minInclusive = true;
-    }
-
     int cmp;
     if (max != null) {
       cmp = min.compareTo(max);
@@ -270,8 +270,56 @@ final public class Automata {
       return makeEmpty();
     }
 
+    if (max != null &&
+        StringHelper.startsWith(max, min) &&
+        suffixIsZeros(max, min.length)) {
+
+      // Finite case: no sink state!
+
+      int maxLength = max.length;
+
+      // the == case was handled above
+      assert maxLength > min.length;
+
+      //  bar -> bar\0+
+      if (maxInclusive == false) {
+        maxLength--;
+      }
+
+      if (maxLength == min.length) {
+        if (minInclusive == false) {
+          return makeEmpty();
+        } else {
+          return makeBinary(min);
+        }
+      }
+
+      Automaton a = new Automaton();
+      int lastState = a.createState();
+      for (int i=0;i<min.length;i++) {
+        int state = a.createState();
+        int label = min.bytes[min.offset+i] & 0xff;
+        a.addTransition(lastState, state, label);
+        lastState = state;
+      }
+
+      if (minInclusive) {
+        a.setAccept(lastState, true);
+      }
+
+      for(int i=min.length;i<maxLength;i++) {
+        int state = a.createState();
+        a.addTransition(lastState, state, 0);
+        a.setAccept(state, true);
+        lastState = state;
+      }
+      a.finishState();
+      return a;
+    }
+
     Automaton a = new Automaton();
     int startState = a.createState();
+
     int sinkState = a.createState();
     a.setAccept(sinkState, true);
 

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/Test2BPositions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/Test2BPositions.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/Test2BPositions.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/Test2BPositions.java Tue Apr 14 19:12:09 2015
@@ -29,12 +29,9 @@ import org.apache.lucene.store.BaseDirec
 import org.apache.lucene.store.MockDirectoryWrapper;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.TimeUnits;
 import org.apache.lucene.util.LuceneTestCase.Monster;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 
-import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
-
 /**
  * Test indexes ~82M docs with 52 positions each, so you get &gt; Integer.MAX_VALUE positions
  * @lucene.experimental
@@ -60,8 +57,8 @@ public class Test2BPositions extends Luc
 
     MergePolicy mp = w.getConfig().getMergePolicy();
     if (mp instanceof LogByteSizeMergePolicy) {
-     // 1 petabyte:
-     ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
+      // 1 petabyte:
+      ((LogByteSizeMergePolicy) mp).setMaxMergeMB(1024*1024*1024);
     }
 
     Document doc = new Document();

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/index/TestIndexWriterExceptions.java Tue Apr 14 19:12:09 2015
@@ -31,10 +31,8 @@ import java.util.Random;
 import java.util.concurrent.atomic.AtomicBoolean;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.CannedTokenStream;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -1728,49 +1726,7 @@ public class TestIndexWriterExceptions e
     uoe.doFail = false;
     d.close();
   }
-  
-  public void testIllegalPositions() throws Exception {
-    Directory dir = newDirectory();
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    Document doc = new Document();
-    Token t1 = new Token("foo", 0, 3);
-    t1.setPositionIncrement(Integer.MAX_VALUE);
-    Token t2 = new Token("bar", 4, 7);
-    t2.setPositionIncrement(200);
-    TokenStream overflowingTokenStream = new CannedTokenStream(
-        new Token[] { t1, t2 }
-    );
-    Field field = new TextField("foo", overflowingTokenStream);
-    doc.add(field);
-    try {
-      iw.addDocument(doc);
-      fail();
-    } catch (IllegalArgumentException expected) {
-      // expected exception
-    }
-    iw.close();
-    dir.close();
-  }
-  
-  public void testLegalbutVeryLargePositions() throws Exception {
-    Directory dir = newDirectory();
-    IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null));
-    Document doc = new Document();
-    Token t1 = new Token("foo", 0, 3);
-    t1.setPositionIncrement(Integer.MAX_VALUE-500);
-    if (random().nextBoolean()) {
-      t1.setPayload(new BytesRef(new byte[] { 0x1 } ));
-    }
-    TokenStream overflowingTokenStream = new CannedTokenStream(
-        new Token[] { t1 }
-    );
-    Field field = new TextField("foo", overflowingTokenStream);
-    doc.add(field);
-    iw.addDocument(doc);
-    iw.close();
-    dir.close();
-  }
-  
+
   public void testBoostOmitNorms() throws Exception {
     Directory dir = newDirectory();
     IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));

Modified: lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java (original)
+++ lucene/dev/branches/branch_5x/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java Tue Apr 14 19:12:09 2015
@@ -1105,34 +1105,154 @@ public class TestAutomaton extends Lucen
     }
   }
 
-  public void testMakeBinaryIntervalRandom() throws Exception {
+  private boolean accepts(Automaton a, BytesRef b) {
+    IntsRefBuilder intsBuilder = new IntsRefBuilder();
+    Util.toIntsRef(b, intsBuilder);    
+    return Operations.run(a, intsBuilder.toIntsRef());
+  }
+
+  private Automaton makeBinaryInterval(BytesRef minTerm, boolean minInclusive,
+                                       BytesRef maxTerm, boolean maxInclusive) {
+    
+    if (VERBOSE) {
+      System.out.println("TEST: minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
+    }
+
+    Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive,
+                                              maxTerm, maxInclusive);
+
+    Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
+    if (minA.getNumStates() != a.getNumStates()) {
+      assertTrue(minA.getNumStates() < a.getNumStates());
+      System.out.println("Original was not minimal:");
+      System.out.println("Original:\n" + a.toDot());
+      System.out.println("Minimized:\n" + minA.toDot());
+      System.out.println("minTerm=" + minTerm + " minInclusive=" + minInclusive);
+      System.out.println("maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
+      fail("automaton was not minimal");
+    }
+
+    if (VERBOSE) {
+      System.out.println(a.toDot());
+    }
+
+    return a;
+  }
+
+  public void testMakeBinaryIntervalFiniteCasesBasic() throws Exception {
+    // 0 (incl) - 00 (incl)
+    byte[] zeros = new byte[3];
+    Automaton a = makeBinaryInterval(new BytesRef(zeros, 0, 1), true, new BytesRef(zeros, 0, 2), true);
+    assertTrue(Operations.isFinite(a));
+    assertFalse(accepts(a, new BytesRef()));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
+
+    // '' (incl) - 00 (incl)
+    a = makeBinaryInterval(new BytesRef(), true, new BytesRef(zeros, 0, 2), true);
+    assertTrue(Operations.isFinite(a));
+    assertTrue(accepts(a, new BytesRef()));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
+
+    // '' (excl) - 00 (incl)
+    a = makeBinaryInterval(new BytesRef(), false, new BytesRef(zeros, 0, 2), true);
+    assertTrue(Operations.isFinite(a));
+    assertFalse(accepts(a, new BytesRef()));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 1)));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
+
+    // 0 (excl) - 00 (incl)
+    a = makeBinaryInterval(new BytesRef(zeros, 0, 1), false, new BytesRef(zeros, 0, 2), true);
+    assertTrue(Operations.isFinite(a));
+    assertFalse(accepts(a, new BytesRef()));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 1)));
+    assertTrue(accepts(a, new BytesRef(zeros, 0, 2)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
+
+    // 0 (excl) - 00 (excl)
+    a = makeBinaryInterval(new BytesRef(zeros, 0, 1), false, new BytesRef(zeros, 0, 2), false);
+    assertTrue(Operations.isFinite(a));
+    assertFalse(accepts(a, new BytesRef()));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 1)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 2)));
+    assertFalse(accepts(a, new BytesRef(zeros, 0, 3)));
+  }
+
+  public void testMakeBinaryIntervalFiniteCasesRandom() throws Exception {
     int iters = atLeast(100);
     for(int iter=0;iter<iters;iter++) {
-      BytesRef minTerm = TestUtil.randomBinaryTerm(random());
+      BytesRef prefix = new BytesRef(TestUtil.randomRealisticUnicodeString(random()));
+
+      BytesRefBuilder b = new BytesRefBuilder();
+      b.append(prefix);
+      int numZeros = random().nextInt(10);
+      for(int i=0;i<numZeros;i++) {
+        b.append((byte) 0);
+      }
+      BytesRef minTerm = b.get();
+
+      b = new BytesRefBuilder();
+      b.append(minTerm);
+      numZeros = random().nextInt(10);
+      for(int i=0;i<numZeros;i++) {
+        b.append((byte) 0);
+      }
+      BytesRef maxTerm = b.get();
+      
       boolean minInclusive = random().nextBoolean();
-      BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
       boolean maxInclusive = random().nextBoolean();
-
-      if (VERBOSE) {
-        System.out.println("TEST: iter=" + iter + " minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
+      Automaton a = makeBinaryInterval(minTerm, minInclusive,
+                                       maxTerm, maxInclusive);
+      assertTrue(Operations.isFinite(a));
+      int expectedCount = maxTerm.length - minTerm.length + 1;
+      if (minInclusive == false) {
+        expectedCount--;
+      }
+      if (maxInclusive == false) {
+        expectedCount--;
       }
 
-      Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
+      if (expectedCount <= 0) {
+        assertTrue(Operations.isEmpty(a));
+        continue;
+      } else {
+        // Enumerate all finite strings and verify the count matches what we expect:
+        assertEquals(expectedCount, Operations.getFiniteStrings(a, expectedCount).size());
+      }
 
-      Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
-      if (minA.getNumStates() != a.getNumStates()) {
-        assertTrue(minA.getNumStates() < a.getNumStates());
-        System.out.println("Original was not minimal:");
-        System.out.println("Original:\n" + a.toDot());
-        System.out.println("Minimized:\n" + minA.toDot());
-        System.out.println("minTerm=" + minTerm + " minInclusive=" + minInclusive);
-        System.out.println("maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
-        fail("automaton was not minimal");
+      b = new BytesRefBuilder();
+      b.append(minTerm);
+      if (minInclusive == false) {
+        assertFalse(accepts(a, b.get()));
+        b.append((byte) 0);
       }
+      while (b.length() < maxTerm.length) {
+        b.append((byte) 0);
 
-      if (VERBOSE) {
-        System.out.println(a.toDot());
+        boolean expected;
+        if (b.length() == maxTerm.length) {
+          expected = maxInclusive;
+        } else {
+          expected = true;
+        }
+        assertEquals(expected, accepts(a, b.get()));
       }
+    }
+  }
+
+  public void testMakeBinaryIntervalRandom() throws Exception {
+    int iters = atLeast(100);
+    for(int iter=0;iter<iters;iter++) {
+      BytesRef minTerm = TestUtil.randomBinaryTerm(random());
+      boolean minInclusive = random().nextBoolean();
+      BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
+      boolean maxInclusive = random().nextBoolean();
+
+      Automaton a = makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
 
       for(int iter2=0;iter2<500;iter2++) {
         BytesRef term = TestUtil.randomBinaryTerm(random());

Modified: lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java?rev=1673526&r1=1673525&r2=1673526&view=diff
==============================================================================
--- lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java (original)
+++ lucene/dev/branches/branch_5x/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPostingsFormat.java Tue Apr 14 19:12:09 2015
@@ -25,10 +25,11 @@ import org.apache.lucene.codecs.FieldsCo
 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.index.AssertingLeafReader;
-import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
@@ -219,6 +220,7 @@ public final class AssertingPostingsForm
                 for(int i=0;i<freq;i++) {
                   int pos = postingsEnum.nextPosition();
                   assert pos >= lastPos: "pos=" + pos + " vs lastPos=" + lastPos + " i=" + i + " freq=" + freq;
+                  assert pos <= IndexWriter.MAX_POSITION: "pos=" + pos + " is > IndexWriter.MAX_POSITION=" + IndexWriter.MAX_POSITION;
                   lastPos = pos;
 
                   if (hasOffsets) {