You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2014/04/24 15:57:48 UTC

svn commit: r1589729 - in /lucene/dev/trunk/lucene: ./ codecs/src/java/org/apache/lucene/codecs/bloom/ core/src/java/org/apache/lucene/codecs/ core/src/java/org/apache/lucene/index/ core/src/java/org/apache/lucene/util/ core/src/test/org/apache/lucene/...

Author: mikemccand
Date: Thu Apr 24 13:57:47 2014
New Revision: 1589729

URL: http://svn.apache.org/r1589729
Log:
LUCENE-5610: add Terms.getMin/Max

Added:
    lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTerms.java   (with props)
Modified:
    lucene/dev/trunk/lucene/CHANGES.txt
    lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
    lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java

Modified: lucene/dev/trunk/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/CHANGES.txt?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/CHANGES.txt (original)
+++ lucene/dev/trunk/lucene/CHANGES.txt Thu Apr 24 13:57:47 2014
@@ -77,6 +77,13 @@ Other
 
 ======================= Lucene 4.9.0 =======================
 
+New Features
+
+* LUCENE-5610: Add Terms.getMin and Terms.getMax to get the lowest and
+  highest terms, and NumericUtils.get{Min/Max}{Int/Long} to get the
+  minimum numeric values from the provided Terms.  (Robert Muir, Mike
+  McCandless)
+
 API Changes
 
 * LUCENE-5582: Deprecate IndexOutput.length (just use
@@ -93,7 +100,7 @@ Optimizations
   
 * LUCENE-5599: HttpReplicator did not properly delegate bulk read() to wrapped
   InputStream. (Christoph Kaser via Shai Erera)
-  
+
 Bug fixes
 
 * LUCENE-5600: HttpClientBase did not properly consume a connection if a server

Modified: lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java (original)
+++ lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java Thu Apr 24 13:57:47 2014
@@ -293,6 +293,16 @@ public final class BloomFilteringPosting
       public boolean hasPayloads() {
         return delegateTerms.hasPayloads();
       }
+
+      @Override
+      public BytesRef getMin() throws IOException {
+        return delegateTerms.getMin();
+      }
+
+      @Override
+      public BytesRef getMax() throws IOException {
+        return delegateTerms.getMax();
+      }
     }
     
     final class BloomFilteredTermsEnum extends TermsEnum {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java Thu Apr 24 13:57:47 2014
@@ -163,6 +163,14 @@ public class BlockTreeTermsReader extend
         final long sumDocFreq = in.readVLong();
         final int docCount = in.readVInt();
         final int longsSize = version >= BlockTreeTermsWriter.VERSION_META_ARRAY ? in.readVInt() : 0;
+
+        BytesRef minTerm, maxTerm;
+        if (version >= BlockTreeTermsWriter.VERSION_MIN_MAX_TERMS) {
+          minTerm = readBytesRef(in);
+          maxTerm = readBytesRef(in);
+        } else {
+          minTerm = maxTerm = null;
+        }
         if (docCount < 0 || docCount > info.getDocCount()) { // #docs with field must be <= #docs
           throw new CorruptIndexException("invalid docCount: " + docCount + " maxDoc: " + info.getDocCount() + " (resource=" + in + ")");
         }
@@ -173,7 +181,9 @@ public class BlockTreeTermsReader extend
           throw new CorruptIndexException("invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq + " (resource=" + in + ")");
         }
         final long indexStartFP = indexIn.readVLong();
-        FieldReader previous = fields.put(fieldInfo.name, new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn));
+        FieldReader previous = fields.put(fieldInfo.name,       
+                                          new FieldReader(fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount,
+                                                          indexStartFP, longsSize, indexIn, minTerm, maxTerm));
         if (previous != null) {
           throw new CorruptIndexException("duplicate field: " + fieldInfo.name + " (resource=" + in + ")");
         }
@@ -189,6 +199,14 @@ public class BlockTreeTermsReader extend
     }
   }
 
+  private static BytesRef readBytesRef(IndexInput in) throws IOException {
+    BytesRef bytes = new BytesRef();
+    bytes.length = in.readVInt();
+    bytes.bytes = new byte[bytes.length];
+    in.readBytes(bytes.bytes, 0, bytes.length);
+    return bytes;
+  }
+
   /** Reads terms file header. */
   private int readHeader(IndexInput input) throws IOException {
     int version = CodecUtil.checkHeader(input, BlockTreeTermsWriter.TERMS_CODEC_NAME,
@@ -456,12 +474,15 @@ public class BlockTreeTermsReader extend
     final long indexStartFP;
     final long rootBlockFP;
     final BytesRef rootCode;
+    final BytesRef minTerm;
+    final BytesRef maxTerm;
     final int longsSize;
 
     private final FST<BytesRef> index;
     //private boolean DEBUG;
 
-    FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount, long indexStartFP, int longsSize, IndexInput indexIn) throws IOException {
+    FieldReader(FieldInfo fieldInfo, long numTerms, BytesRef rootCode, long sumTotalTermFreq, long sumDocFreq, int docCount,
+                long indexStartFP, int longsSize, IndexInput indexIn, BytesRef minTerm, BytesRef maxTerm) throws IOException {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       //DEBUG = BlockTreeTermsReader.DEBUG && fieldInfo.name.equals("id");
@@ -472,6 +493,8 @@ public class BlockTreeTermsReader extend
       this.indexStartFP = indexStartFP;
       this.rootCode = rootCode;
       this.longsSize = longsSize;
+      this.minTerm = minTerm;
+      this.maxTerm = maxTerm;
       // if (DEBUG) {
       //   System.out.println("BTTR: seg=" + segment + " field=" + fieldInfo.name + " rootBlockCode=" + rootCode + " divisor=" + indexDivisor);
       // }
@@ -498,6 +521,26 @@ public class BlockTreeTermsReader extend
       }
     }
 
+    @Override
+    public BytesRef getMin() throws IOException {
+      if (minTerm == null) {
+        // Older index that didn't store min/maxTerm
+        return super.getMin();
+      } else {
+        return minTerm;
+      }
+    }
+
+    @Override
+    public BytesRef getMax() throws IOException {
+      if (maxTerm == null) {
+        // Older index that didn't store min/maxTerm
+        return super.getMax();
+      } else {
+        return maxTerm;
+      }
+    }
+
     /** For debugging -- used by CheckIndex too*/
     // TODO: maybe push this into Terms?
     public Stats computeStats() throws IOException {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsWriter.java Thu Apr 24 13:57:47 2014
@@ -115,11 +115,12 @@ import org.apache.lucene.util.packed.Pac
  *    <li>InnerNode --&gt; EntryCount, SuffixLength[,Sub?], Byte<sup>SuffixLength</sup>, StatsLength, &lt; TermStats ? &gt;<sup>EntryCount</sup>, MetaLength, &lt;<i>TermMetadata ? </i>&gt;<sup>EntryCount</sup></li>
  *    <li>TermStats --&gt; DocFreq, TotalTermFreq </li>
  *    <li>FieldSummary --&gt; NumFields, &lt;FieldNumber, NumTerms, RootCodeLength, Byte<sup>RootCodeLength</sup>,
- *                            SumTotalTermFreq?, SumDocFreq, DocCount&gt;<sup>NumFields</sup></li>
+ *                            SumTotalTermFreq?, SumDocFreq, DocCount, LongsSize, MinTerm, MaxTerm&gt;<sup>NumFields</sup></li>
  *    <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
  *    <li>DirOffset --&gt; {@link DataOutput#writeLong Uint64}</li>
+ *    <li>MinTerm,MaxTerm --&gt; {@link DataOutput#writeVInt VInt} length followed by the byte[]</li>
  *    <li>EntryCount,SuffixLength,StatsLength,DocFreq,MetaLength,NumFields,
- *        FieldNumber,RootCodeLength,DocCount --&gt; {@link DataOutput#writeVInt VInt}</li>
+ *        FieldNumber,RootCodeLength,DocCount,LongsSize --&gt; {@link DataOutput#writeVInt VInt}</li>
  *    <li>TotalTermFreq,NumTerms,SumTotalTermFreq,SumDocFreq --&gt; 
  *        {@link DataOutput#writeVLong VLong}</li>
  *    <li>Footer --&gt; {@link CodecUtil#writeFooter CodecFooter}</li>
@@ -138,6 +139,9 @@ import org.apache.lucene.util.packed.Pac
  *    <li>SumDocFreq is the total number of postings, the number of term-document pairs across
  *        the entire field.</li>
  *    <li>DocCount is the number of documents that have at least one posting for this field.</li>
+ *    <li>LongsSize records how many long values the postings writer/reader record per term
+ *        (e.g., to hold freq/prox/doc file offsets).
+ *    <li>MinTerm, MaxTerm are the lowest and highest term in this field.</li>
  *    <li>PostingsHeader and TermMetadata are plugged into by the specific postings implementation:
  *        these contain arbitrary per-file data (such as parameters or versioning information) 
  *        and per-term data (such as pointers to inverted files).</li>
@@ -216,8 +220,11 @@ public class BlockTreeTermsWriter extend
   /** checksums */
   public static final int VERSION_CHECKSUM = 3;
 
+  /** min/max term */
+  public static final int VERSION_MIN_MAX_TERMS = 4;
+
   /** Current terms format. */
-  public static final int VERSION_CURRENT = VERSION_CHECKSUM;
+  public static final int VERSION_CURRENT = VERSION_MIN_MAX_TERMS;
 
   /** Extension of terms index file */
   static final String TERMS_INDEX_EXTENSION = "tip";
@@ -241,8 +248,11 @@ public class BlockTreeTermsWriter extend
     public final long sumDocFreq;
     public final int docCount;
     private final int longsSize;
+    public final BytesRef minTerm;
+    public final BytesRef maxTerm;
 
-    public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize) {
+    public FieldMetaData(FieldInfo fieldInfo, BytesRef rootCode, long numTerms, long indexStartFP, long sumTotalTermFreq, long sumDocFreq, int docCount, int longsSize,
+                         BytesRef minTerm, BytesRef maxTerm) {
       assert numTerms > 0;
       this.fieldInfo = fieldInfo;
       assert rootCode != null: "field=" + fieldInfo.name + " numTerms=" + numTerms;
@@ -253,6 +263,8 @@ public class BlockTreeTermsWriter extend
       this.sumDocFreq = sumDocFreq;
       this.docCount = docCount;
       this.longsSize = longsSize;
+      this.minTerm = minTerm;
+      this.maxTerm = maxTerm;
     }
   }
 
@@ -354,16 +366,21 @@ public class BlockTreeTermsWriter extend
         TermsEnum termsEnum = terms.iterator(null);
 
         TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
-
+        BytesRef minTerm = null;
+        BytesRef maxTerm = new BytesRef();
         while (true) {
           BytesRef term = termsEnum.next();
           if (term == null) {
             break;
           }
+          if (minTerm == null) {
+            minTerm = BytesRef.deepCopyOf(term);
+          }
+          maxTerm.copyBytes(term);
           termsWriter.write(term, termsEnum);
         }
 
-        termsWriter.finish();
+        termsWriter.finish(minTerm, minTerm == null ? null : maxTerm);
       }
       success = true;
     } finally {
@@ -1065,7 +1082,7 @@ public class BlockTreeTermsWriter extend
     }
 
     // Finishes all terms in this field
-    public void finish() throws IOException {
+    public void finish(BytesRef minTerm, BytesRef maxTerm) throws IOException {
       if (numTerms > 0) {
         blockBuilder.finish();
 
@@ -1095,7 +1112,8 @@ public class BlockTreeTermsWriter extend
                                      sumTotalTermFreq,
                                      sumDocFreq,
                                      docsSeen.cardinality(),
-                                     longsSize));
+                                     longsSize,
+                                     minTerm, maxTerm));
       } else {
         assert sumTotalTermFreq == 0 || fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY && sumTotalTermFreq == -1;
         assert sumDocFreq == 0;
@@ -1123,6 +1141,7 @@ public class BlockTreeTermsWriter extend
       for(FieldMetaData field : fields) {
         //System.out.println("  field " + field.fieldInfo.name + " " + field.numTerms + " terms");
         out.writeVInt(field.fieldInfo.number);
+        assert field.numTerms > 0;
         out.writeVLong(field.numTerms);
         out.writeVInt(field.rootCode.length);
         out.writeBytes(field.rootCode.bytes, field.rootCode.offset, field.rootCode.length);
@@ -1133,6 +1152,8 @@ public class BlockTreeTermsWriter extend
         out.writeVInt(field.docCount);
         out.writeVInt(field.longsSize);
         indexOut.writeVLong(field.indexStartFP);
+        writeBytesRef(out, field.minTerm);
+        writeBytesRef(out, field.maxTerm);
       }
       writeTrailer(out, dirStart);
       CodecUtil.writeFooter(out);
@@ -1144,4 +1165,9 @@ public class BlockTreeTermsWriter extend
       IOUtils.closeWhileHandlingException(ioe, out, indexOut, postingsWriter);
     }
   }
+
+  private static void writeBytesRef(IndexOutput out, BytesRef bytes) throws IOException {
+    out.writeVInt(bytes.length);
+    out.writeBytes(bytes.bytes, bytes.offset, bytes.length);
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java Thu Apr 24 13:57:47 2014
@@ -756,6 +756,14 @@ public class CheckIndex {
       final boolean hasPositions = terms.hasPositions();
       final boolean hasPayloads = terms.hasPayloads();
       final boolean hasOffsets = terms.hasOffsets();
+      
+      BytesRef bb = terms.getMin();
+      assert bb.isValid();
+      final BytesRef minTerm = bb == null ? null : BytesRef.deepCopyOf(bb);
+      
+      bb = terms.getMax();
+      assert bb.isValid();
+      final BytesRef maxTerm = bb == null ? null : BytesRef.deepCopyOf(bb);
 
       // term vectors cannot omit TF:
       final boolean expectedHasFreqs = (isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0);
@@ -817,6 +825,14 @@ public class CheckIndex {
           lastTerm.copyBytes(term);
         }
         
+        if (term.compareTo(minTerm) < 0) {
+          throw new RuntimeException("invalid term: term=" + term + ", minTerm=" + minTerm);
+        }
+        
+        if (term.compareTo(maxTerm) > 0) {
+          throw new RuntimeException("invalid term: term=" + term + ", maxTerm=" + maxTerm);
+        }
+        
         final int docFreq = termsEnum.docFreq();
         if (docFreq <= 0) {
           throw new RuntimeException("docfreq: " + docFreq + " is out of bounds");

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/FilteredTermsEnum.java Thu Apr 24 13:57:47 2014
@@ -36,11 +36,14 @@ import org.apache.lucene.util.Bits;
  */
 public abstract class FilteredTermsEnum extends TermsEnum {
 
-  private BytesRef initialSeekTerm = null;
+  private BytesRef initialSeekTerm;
   private boolean doSeek;
-  private BytesRef actualTerm = null;
 
-  private final TermsEnum tenum;
+  /** Which term the enum is currently positioned to. */
+  protected BytesRef actualTerm;
+
+  /** The delegate {@link TermsEnum}. */
+  protected final TermsEnum tenum;
 
   /** Return value, if term should be accepted or the iteration should
    * {@code END}. The {@code *_SEEK} values denote, that after handling the current term
@@ -246,6 +249,7 @@ public abstract class FilteredTermsEnum 
         case END:
           // we are supposed to end the enum
           return null;
+        // NO: we just fall through and iterate again
       }
     }
   }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/MultiTerms.java Thu Apr 24 13:57:47 2014
@@ -84,6 +84,32 @@ public final class MultiTerms extends Te
       return TermsEnum.EMPTY;
     }
   }
+  
+  @Override
+  public BytesRef getMin() throws IOException {
+    BytesRef minTerm = null;
+    for(Terms terms : subs) {
+      BytesRef term = terms.getMin();
+      if (minTerm == null || term.compareTo(minTerm) < 0) {
+        minTerm = term;
+      }
+    }
+
+    return minTerm;
+  }
+
+  @Override
+  public BytesRef getMax() throws IOException {
+    BytesRef maxTerm = null;
+    for(Terms terms : subs) {
+      BytesRef term = terms.getMax();
+      if (maxTerm == null || term.compareTo(maxTerm) > 0) {
+        maxTerm = term;
+      }
+    }
+
+    return maxTerm;
+  }
 
   @Override
   public TermsEnum iterator(TermsEnum reuse) throws IOException {

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/index/Terms.java Thu Apr 24 13:57:47 2014
@@ -117,4 +117,78 @@ public abstract class Terms {
 
   /** Zero-length array of {@link Terms}. */
   public final static Terms[] EMPTY_ARRAY = new Terms[0];
+  
+  /** Returns the smallest term (in lexicographic order) in the field. 
+   *  Note that, just like other term measures, this measure does not 
+   *  take deleted documents into account. */
+  public BytesRef getMin() throws IOException {
+    return iterator(null).next();
+  }
+
+  /** Returns the largest term (in lexicographic order) in the field. 
+   *  Note that, just like other term measures, this measure does not 
+   *  take deleted documents into account. */
+  @SuppressWarnings("fallthrough")
+  public BytesRef getMax() throws IOException {
+    long size = size();
+    
+    if (size == 0) {
+      // empty: only possible from a FilteredTermsEnum...
+      return null;
+    } else if (size >= 0) {
+      // try to seek-by-ord
+      try {
+        TermsEnum iterator = iterator(null);
+        iterator.seekExact(size - 1);
+        return iterator.term();
+      } catch (UnsupportedOperationException e) {
+        // ok
+      }
+    }
+    
+    // otherwise: binary search
+    TermsEnum iterator = iterator(null);
+    BytesRef v = iterator.next();
+    if (v == null) {
+      // empty: only possible from a FilteredTermsEnum...
+      return v;
+    }
+
+    BytesRef scratch = new BytesRef(1);
+
+    scratch.length = 1;
+
+    // Iterates over digits:
+    while (true) {
+
+      int low = 0;
+      int high = 256;
+
+      // Binary search current digit to find the highest
+      // digit before END:
+      while (low != high) {
+        int mid = (low+high) >>> 1;
+        scratch.bytes[scratch.length-1] = (byte) mid;
+        if (iterator.seekCeil(scratch) == TermsEnum.SeekStatus.END) {
+          // Scratch was too high
+          if (mid == 0) {
+            scratch.length--;
+            return scratch;
+          }
+          high = mid;
+        } else {
+          // Scratch was too low; there is at least one term
+          // still after it:
+          if (low == mid) {
+            break;
+          }
+          low = mid;
+        }
+      }
+
+      // Recurse to next digit:
+      scratch.length++;
+      scratch.grow(scratch.length);
+    }
+  }
 }

Modified: lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java (original)
+++ lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/util/NumericUtils.java Thu Apr 24 13:57:47 2014
@@ -17,12 +17,16 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.NumericTokenStream;
 import org.apache.lucene.document.DoubleField; // javadocs
 import org.apache.lucene.document.FloatField; // javadocs
 import org.apache.lucene.document.IntField; // javadocs
 import org.apache.lucene.document.LongField; // javadocs
+import org.apache.lucene.index.FilterAtomicReader;
 import org.apache.lucene.index.FilteredTermsEnum;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.NumericRangeFilter;
 import org.apache.lucene.search.NumericRangeQuery; // for javadocs
@@ -464,14 +468,15 @@ public final class NumericUtils {
    *         terms with a shift value of <tt>0</tt>.
    */
   public static TermsEnum filterPrefixCodedLongs(TermsEnum termsEnum) {
-    return new FilteredTermsEnum(termsEnum, false) {
+    return new SeekingNumericFilteredTermsEnum(termsEnum) {
+
       @Override
       protected AcceptStatus accept(BytesRef term) {
         return NumericUtils.getPrefixCodedLongShift(term) == 0 ? AcceptStatus.YES : AcceptStatus.END;
       }
     };
   }
-  
+
   /**
    * Filters the given {@link TermsEnum} by accepting only prefix coded 32 bit
    * terms with a shift value of <tt>0</tt>.
@@ -482,7 +487,7 @@ public final class NumericUtils {
    *         terms with a shift value of <tt>0</tt>.
    */
   public static TermsEnum filterPrefixCodedInts(TermsEnum termsEnum) {
-    return new FilteredTermsEnum(termsEnum, false) {
+    return new SeekingNumericFilteredTermsEnum(termsEnum) {
       
       @Override
       protected AcceptStatus accept(BytesRef term) {
@@ -490,5 +495,85 @@ public final class NumericUtils {
       }
     };
   }
+
+  /** Just like FilteredTermsEnum, except it adds a limited
+   *  seekCeil implementation that only works with {@link
+   *  #filterPrefixCodedInts} and {@link
+   *  #filterPrefixCodedLongs}. */
+  private static abstract class SeekingNumericFilteredTermsEnum extends FilteredTermsEnum {
+    public SeekingNumericFilteredTermsEnum(final TermsEnum tenum) {
+      super(tenum, false);
+    }
+
+    @Override
+    @SuppressWarnings("fallthrough")
+    public SeekStatus seekCeil(BytesRef term) throws IOException {
+
+      // NOTE: This is not general!!  It only handles YES
+      // and END, because that's all we need for the numeric
+      // case here
+
+      SeekStatus status = tenum.seekCeil(term);
+      if (status == SeekStatus.END) {
+        return SeekStatus.END;
+      }
+
+      actualTerm = tenum.term();
+
+      if (accept(actualTerm) == AcceptStatus.YES) {
+        return status;
+      } else {
+        return SeekStatus.END;
+      }
+    }
+  }
+
+  private static Terms intTerms(Terms terms) {
+    return new FilterAtomicReader.FilterTerms(terms) {
+        @Override
+        public TermsEnum iterator(TermsEnum reuse) throws IOException {
+          return filterPrefixCodedInts(in.iterator(reuse));
+        }
+      };
+  }
+
+  private static Terms longTerms(Terms terms) {
+    return new FilterAtomicReader.FilterTerms(terms) {
+        @Override
+        public TermsEnum iterator(TermsEnum reuse) throws IOException {
+          return filterPrefixCodedLongs(in.iterator(reuse));
+        }
+      };
+  }
+    
+  /** Returns the minimum int value indexed into this
+   *  numeric field. */
+  public static int getMinInt(Terms terms) throws IOException {
+    // All shift=0 terms are sorted first, so we don't need
+    // to filter the incoming terms; we can just get the
+    // min: 
+    return NumericUtils.prefixCodedToInt(terms.getMin());
+  }
+
+  /** Returns the maximum int value indexed into this
+   *  numeric field. */
+  public static int getMaxInt(Terms terms) throws IOException {
+    return NumericUtils.prefixCodedToInt(intTerms(terms).getMax());
+  }
+
+  /** Returns the minimum long value indexed into this
+   *  numeric field. */
+  public static long getMinLong(Terms terms) throws IOException {
+    // All shift=0 terms are sorted first, so we don't need
+    // to filter the incoming terms; we can just get the
+    // min: 
+    return NumericUtils.prefixCodedToLong(terms.getMin());
+  }
+
+  /** Returns the maximum long value indexed into this
+   *  numeric field. */
+  public static long getMaxLong(Terms terms) throws IOException {
+    return NumericUtils.prefixCodedToLong(longTerms(terms).getMax());
+  }
   
 }

Added: lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTerms.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTerms.java?rev=1589729&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTerms.java (added)
+++ lucene/dev/trunk/lucene/core/src/test/org/apache/lucene/index/TestTerms.java Thu Apr 24 13:57:47 2014
@@ -0,0 +1,196 @@
+package org.apache.lucene.index;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.*;
+
+import org.apache.lucene.analysis.CannedBinaryTokenStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FloatField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.util.TestUtil;
+
+public class TestTerms extends LuceneTestCase {
+
+  public void testTermMinMaxBasic() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    Document doc = new Document();
+    doc.add(newTextField("field", "a b c cc ddd", Field.Store.NO));
+    w.addDocument(doc);
+    IndexReader r = w.getReader();
+    Terms terms = MultiFields.getTerms(r, "field");
+    assertEquals(new BytesRef("a"), terms.getMin());
+    assertEquals(new BytesRef("ddd"), terms.getMax());
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testTermMinMaxRandom() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    int numDocs = atLeast(100);
+    BytesRef minTerm = null;
+    BytesRef maxTerm = null;
+    for(int i=0;i<numDocs;i++ ){
+      Document doc = new Document();
+      Field field = new TextField("field", "", Field.Store.NO);
+      doc.add(field);
+      //System.out.println("  doc " + i);
+      CannedBinaryTokenStream.BinaryToken[] tokens = new CannedBinaryTokenStream.BinaryToken[atLeast(10)];
+      for(int j=0;j<tokens.length;j++) {
+        byte[] bytes = new byte[TestUtil.nextInt(random(), 1, 20)];
+        random().nextBytes(bytes);
+        BytesRef tokenBytes = new BytesRef(bytes);
+        //System.out.println("    token " + tokenBytes);
+        if (minTerm == null || tokenBytes.compareTo(minTerm) < 0) {
+          //System.out.println("      ** new min");
+          minTerm = tokenBytes;
+        }
+        if (maxTerm == null || tokenBytes.compareTo(maxTerm) > 0) {
+          //System.out.println("      ** new max");
+          maxTerm = tokenBytes;
+        }
+        tokens[j] = new CannedBinaryTokenStream.BinaryToken(tokenBytes);
+      }
+      field.setTokenStream(new CannedBinaryTokenStream(tokens));
+      w.addDocument(doc);
+    }
+
+    IndexReader r = w.getReader();
+    Terms terms = MultiFields.getTerms(r, "field");
+    assertEquals(minTerm, terms.getMin());
+    assertEquals(maxTerm, terms.getMax());
+    
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testIntFieldMinMax() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    int numDocs = atLeast(100);
+    int minValue = Integer.MAX_VALUE;
+    int maxValue = Integer.MIN_VALUE;
+    for(int i=0;i<numDocs;i++ ){
+      Document doc = new Document();
+      int num = random().nextInt();
+      minValue = Math.min(num, minValue);
+      maxValue = Math.max(num, maxValue);
+      doc.add(new IntField("field", num, Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+    Terms terms = MultiFields.getTerms(r, "field");
+    assertEquals(minValue, NumericUtils.getMinInt(terms));
+    assertEquals(maxValue, NumericUtils.getMaxInt(terms));
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testLongFieldMinMax() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    int numDocs = atLeast(100);
+    long minValue = Long.MAX_VALUE;
+    long maxValue = Long.MIN_VALUE;
+    for(int i=0;i<numDocs;i++ ){
+      Document doc = new Document();
+      long num = random().nextLong();
+      minValue = Math.min(num, minValue);
+      maxValue = Math.max(num, maxValue);
+      doc.add(new LongField("field", num, Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+
+    Terms terms = MultiFields.getTerms(r, "field");
+    assertEquals(minValue, NumericUtils.getMinLong(terms));
+    assertEquals(maxValue, NumericUtils.getMaxLong(terms));
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testFloatFieldMinMax() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    int numDocs = atLeast(100);
+    float minValue = Float.POSITIVE_INFINITY;
+    float maxValue = Float.NEGATIVE_INFINITY;
+    for(int i=0;i<numDocs;i++ ){
+      Document doc = new Document();
+      float num = random().nextFloat();
+      minValue = Math.min(num, minValue);
+      maxValue = Math.max(num, maxValue);
+      doc.add(new FloatField("field", num, Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+    Terms terms = MultiFields.getTerms(r, "field");
+    assertEquals(minValue, NumericUtils.sortableIntToFloat(NumericUtils.getMinInt(terms)), 0.0f);
+    assertEquals(maxValue, NumericUtils.sortableIntToFloat(NumericUtils.getMaxInt(terms)), 0.0f);
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+
+  public void testDoubleFieldMinMax() throws Exception {
+    Directory dir = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random(), dir);
+    int numDocs = atLeast(100);
+    double minValue = Double.POSITIVE_INFINITY;
+    double maxValue = Double.NEGATIVE_INFINITY;
+    for(int i=0;i<numDocs;i++ ){
+      Document doc = new Document();
+      double num = random().nextDouble();
+      minValue = Math.min(num, minValue);
+      maxValue = Math.max(num, maxValue);
+      doc.add(new DoubleField("field", num, Field.Store.NO));
+      w.addDocument(doc);
+    }
+    
+    IndexReader r = w.getReader();
+
+    Terms terms = MultiFields.getTerms(r, "field");
+
+    assertEquals(minValue, NumericUtils.sortableLongToDouble(NumericUtils.getMinLong(terms)), 0.0);
+    assertEquals(maxValue, NumericUtils.sortableLongToDouble(NumericUtils.getMaxLong(terms)), 0.0);
+
+    r.close();
+    w.close();
+    dir.close();
+  }
+}

Modified: lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java?rev=1589729&r1=1589728&r2=1589729&view=diff
==============================================================================
--- lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java (original)
+++ lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/AssertingAtomicReader.java Thu Apr 24 13:57:47 2014
@@ -91,6 +91,20 @@ public class AssertingAtomicReader exten
     }
 
     @Override
+    public BytesRef getMin() throws IOException {
+      BytesRef v = in.getMin();
+      assert v == null || v.isValid();
+      return v;
+    }
+
+    @Override
+    public BytesRef getMax() throws IOException {
+      BytesRef v = in.getMax();
+      assert v == null || v.isValid();
+      return v;
+    }
+
+    @Override
     public TermsEnum iterator(TermsEnum reuse) throws IOException {
       // TODO: should we give this thing a random to be super-evil,
       // and randomly *not* unwrap?