You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by rm...@apache.org on 2015/04/02 17:37:41 UTC

svn commit: r1670929 [1/5] - in /lucene/dev/branches/lucene6271: ./ lucene/ lucene/codecs/ lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/ lucene/codecs/src/resources/META-INF/services/ lucene/codecs/src/test/org/apache/lucene/codecs/autopr...

Author: rmuir
Date: Thu Apr  2 15:37:39 2015
New Revision: 1670929

URL: http://svn.apache.org/r1670929
Log:
merge trunk up to r1670923

Added:
    lucene/dev/branches/lucene6271/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/
      - copied from r1670920, lucene/dev/trunk/lucene/codecs/src/java/org/apache/lucene/codecs/autoprefix/
    lucene/dev/branches/lucene6271/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/
      - copied from r1670920, lucene/dev/trunk/lucene/codecs/src/test/org/apache/lucene/codecs/autoprefix/
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
      - copied unchanged from r1670920, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/AutoPrefixTermsWriter.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetPostingsEnum.java
      - copied unchanged from r1670920, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetPostingsEnum.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
      - copied unchanged from r1670920, lucene/dev/trunk/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BitSetTermsEnum.java
    lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
      - copied unchanged from r1670920, lucene/dev/trunk/lucene/test-framework/src/java/org/apache/lucene/index/RandomPostingsTester.java
Removed:
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/search/TermRangeTermsEnum.java
Modified:
    lucene/dev/branches/lucene6271/   (props changed)
    lucene/dev/branches/lucene6271/lucene/   (props changed)
    lucene/dev/branches/lucene6271/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/lucene6271/lucene/codecs/   (props changed)
    lucene/dev/branches/lucene6271/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
    lucene/dev/branches/lucene6271/lucene/core/   (props changed)
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnumFrame.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnum.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/SegmentTermsEnumFrame.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/Stats.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/AutomatonTermsEnum.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/FreqProxFields.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/MappingMultiPostingsEnum.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/TermContext.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/index/Terms.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/search/AutomatonQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/search/PrefixQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/search/ScoringRewrite.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/search/TermRangeQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/util/automaton/Automata.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/util/automaton/Automaton.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/util/automaton/CompiledAutomaton.java
    lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/util/automaton/RegExp.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestAutomatonQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestPrefixQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestTermRangeQuery.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/search/TestWildcard.java
    lucene/dev/branches/lucene6271/lucene/core/src/test/org/apache/lucene/util/automaton/TestAutomaton.java
    lucene/dev/branches/lucene6271/lucene/test-framework/   (props changed)
    lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/AssertingLeafReader.java
    lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/index/BasePostingsFormatTestCase.java
    lucene/dev/branches/lucene6271/lucene/test-framework/src/java/org/apache/lucene/util/TestUtil.java
    lucene/dev/branches/lucene6271/solr/   (props changed)
    lucene/dev/branches/lucene6271/solr/core/   (props changed)
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/client/solrj/embedded/EmbeddedSolrServer.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/Overseer.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/OverseerCollectionProcessor.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/ZkController.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/overseer/ClusterStateMutator.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/overseer/CollectionMutator.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/overseer/ReplicaMutator.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/ImplicitPlugins.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/InitParams.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/JmxMonitoredMap.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/PluginBag.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/SolrConfig.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/SolrCore.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/core/SolrXmlConfig.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/BlobHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/DumpRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/NotFoundRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/SchemaHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/SolrConfigHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/UpdateRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/InfoHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/PluginInfoHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/PropertiesRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/SegmentsInfoRequestHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/SystemInfoHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/admin/ThreadDumpHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/component/DebugComponent.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/component/SearchHandler.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/loader/JsonLoader.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/handler/loader/XMLLoader.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/logging/LoggerInfo.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/request/json/RequestUtil.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/response/SchemaXmlWriter.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/response/XMLWriter.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/rest/BaseSolrResource.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/schema/FieldTypePluginLoader.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/schema/PreAnalyzedField.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/search/CacheConfig.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/search/LFUCache.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/search/SolrCacheBase.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/servlet/SolrRequestParsers.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/servlet/ZookeeperInfoServlet.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/spelling/suggest/SolrSuggester.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/util/DOMUtil.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/util/SolrCLI.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/util/plugin/AbstractPluginLoader.java
    lucene/dev/branches/lucene6271/solr/core/src/java/org/apache/solr/util/plugin/MapPluginLoader.java
    lucene/dev/branches/lucene6271/solr/core/src/test/org/apache/solr/cloud/LeaderInitiatedRecoveryOnCommitTest.java
    lucene/dev/branches/lucene6271/solr/server/scripts/cloud-scripts/zkcli.bat
    lucene/dev/branches/lucene6271/solr/solrj/   (props changed)
    lucene/dev/branches/lucene6271/solr/solrj/src/java/org/apache/solr/common/params/CommonParams.java

Modified: lucene/dev/branches/lucene6271/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/CHANGES.txt?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/lucene6271/lucene/CHANGES.txt Thu Apr  2 15:37:39 2015
@@ -19,6 +19,10 @@ New Features
   for counting ranges that align with the underlying terms as defined by the
   NumberRangePrefixTree (e.g. familiar date units like days).  (David Smiley)
 
+* LUCENE-5879: Added experimental auto-prefix terms to BlockTree terms
+  dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand,
+  Uwe Schindler, Robert Muir, Mike McCandless)
+
 API Changes
 
 * LUCENE-3312: The API of oal.document was restructured to

Modified: lucene/dev/branches/lucene6271/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat (original)
+++ lucene/dev/branches/lucene6271/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat Thu Apr  2 15:37:39 2015
@@ -20,3 +20,4 @@ org.apache.lucene.codecs.memory.FSTOrdPo
 org.apache.lucene.codecs.memory.FSTPostingsFormat
 org.apache.lucene.codecs.memory.MemoryPostingsFormat
 org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
+org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/BlockTermState.java Thu Apr  2 15:37:39 2015
@@ -16,6 +16,7 @@ package org.apache.lucene.codecs;
  * limitations under the License.
  */
 
+import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; // javadocs
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.TermState;
 
@@ -23,6 +24,8 @@ import org.apache.lucene.index.TermState
  * Holds all state required for {@link PostingsReaderBase}
  * to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
  * terms dict.
+ *
+ * @lucene.internal
  */
 public class BlockTermState extends OrdTermState {
   /** how many docs have this term */
@@ -36,6 +39,11 @@ public class BlockTermState extends OrdT
   // TODO: update BTR to nuke this
   public long blockFilePointer;
 
+  /** True if this term is "real" (e.g., not an auto-prefix term or
+   *  some other "secret" term; currently only {@link BlockTreeTermsReader}
+   *  sets this). */
+  public boolean isRealTerm;
+
   /** Sole constructor. (For invocation by subclass 
    *  constructors, typically implicit.) */
   protected BlockTermState() {
@@ -50,10 +58,11 @@ public class BlockTermState extends OrdT
     totalTermFreq = other.totalTermFreq;
     termBlockOrd = other.termBlockOrd;
     blockFilePointer = other.blockFilePointer;
+    isRealTerm = other.isRealTerm;
   }
 
   @Override
   public String toString() {
-    return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
+    return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
   }
 }

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/PostingsFormat.java Thu Apr  2 15:37:39 2015
@@ -62,6 +62,7 @@ public abstract class PostingsFormat imp
    * @param name must be all ascii alphanumeric, and less than 128 characters in length.
    */
   protected PostingsFormat(String name) {
+    // TODO: can we somehow detect name conflicts here?  Two different classes trying to claim the same name?  Otherwise you see confusing errors...
     NamedSPILoader.checkServiceName(name);
     this.name = name;
   }

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsReader.java Thu Apr  2 15:37:39 2015
@@ -34,6 +34,8 @@ import org.apache.lucene.index.IndexFile
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.Terms;
+import org.apache.lucene.search.PrefixQuery;  // javadocs
+import org.apache.lucene.search.TermRangeQuery;  // javadocs
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.Accountable;
 import org.apache.lucene.util.Accountables;
@@ -57,6 +59,14 @@ import org.apache.lucene.util.fst.Output
  *  min/maxItemsPerBlock during indexing to control how
  *  much memory the terms index uses.</p>
  *
+ *  <p>If auto-prefix terms were indexed (see
+ *  {@link BlockTreeTermsWriter}), then the {@link Terms#intersect}
+ *  implementation here will make use of these terms only if the
+ *  automaton has a binary sink state, i.e. an accept state
+ *  which has a transition to itself accepting all byte values.
+ *  For example, both {@link PrefixQuery} and {@link TermRangeQuery}
+ *  pass such automata to {@link Terms#intersect}.</p>
+ *
  *  <p>The data structure used by this implementation is very
  *  similar to a burst trie
  *  (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
@@ -90,8 +100,11 @@ public final class BlockTreeTermsReader
   /** Initial terms format. */
   public static final int VERSION_START = 0;
 
+  /** Auto-prefix terms. */
+  public static final int VERSION_AUTO_PREFIX_TERMS = 1;
+
   /** Current terms format. */
-  public static final int VERSION_CURRENT = VERSION_START;
+  public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS;
 
   /** Extension of terms index file */
   static final String TERMS_INDEX_EXTENSION = "tip";
@@ -116,7 +129,7 @@ public final class BlockTreeTermsReader
 
   final String segment;
   
-  private final int version;
+  final int version;
 
   /** Sole constructor. */
   public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/BlockTreeTermsWriter.java Thu Apr  2 15:37:39 2015
@@ -25,11 +25,13 @@ import org.apache.lucene.codecs.BlockTer
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.FieldsConsumer;
 import org.apache.lucene.codecs.PostingsWriterBase;
+import org.apache.lucene.codecs.blocktree.AutoPrefixTermsWriter.PrefixTerm;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@@ -87,6 +89,16 @@ import org.apache.lucene.util.packed.Pac
  * stride) each term's metadata for each set of terms
  * between two index terms.
  * <p>
+ *
+ * If {@code minItemsInAutoPrefix} is not zero, then for
+ * {@link IndexOptions#DOCS} fields we detect prefixes that match
+ * "enough" terms and insert auto-prefix terms into the index, which are
+ * used by {@link Terms#intersect}  at search time to speed up prefix
+ * and range queries.  Besides {@link Terms#intersect}, these
+ * auto-prefix terms are invisible to all other APIs (don't change terms
+ * stats, don't show up in normal {@link TermsEnum}s, etc.).
+ * <p>
+ *
  * Files:
  * <ul>
  *   <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
@@ -200,7 +212,9 @@ public final class BlockTreeTermsWriter
    *  #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
   public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
 
-  // public final static boolean DEBUG = false;
+  //public static boolean DEBUG = false;
+  //public static boolean DEBUG2 = false;
+
   //private final static boolean SAVE_DOT_FILES = false;
 
   private final IndexOutput termsOut;
@@ -208,6 +222,8 @@ public final class BlockTreeTermsWriter
   final int maxDoc;
   final int minItemsInBlock;
   final int maxItemsInBlock;
+  final int minItemsInAutoPrefix;
+  final int maxItemsInAutoPrefix;
 
   final PostingsWriterBase postingsWriter;
   final FieldInfos fieldInfos;
@@ -244,23 +260,67 @@ public final class BlockTreeTermsWriter
   private final List<FieldMetaData> fields = new ArrayList<>();
 
   // private final String segment;
+  final FixedBitSet prefixDocs;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  final BitSetTermsEnum prefixFixedBitsTermsEnum;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  private TermsEnum prefixTermsEnum;
+
+  /** Reused in getAutoPrefixTermsEnum: */
+  private PostingsEnum prefixDocsEnum;
+
+  /** Create a new writer, using default values for auto-prefix terms. */
+  public BlockTreeTermsWriter(SegmentWriteState state,
+                              PostingsWriterBase postingsWriter,
+                              int minItemsInBlock,
+                              int maxItemsInBlock) throws IOException {
+    this(state, postingsWriter, minItemsInBlock, maxItemsInBlock, 0, 0);
+  }
 
   /** Create a new writer.  The number of items (terms or
    *  sub-blocks) per block will aim to be between
    *  minItemsPerBlock and maxItemsPerBlock, though in some
-   *  cases the blocks may be smaller than the min. */
+   *  cases the blocks may be smaller than the min.
+   *  For DOCS_ONLY fields, this terms dictionary will
+   *  insert automatically generated prefix terms for common
+   *  prefixes, as long as each prefix matches at least
+   *  {@code minItemsInAutoPrefix} other terms or prefixes,
+   *  and at most {@code maxItemsInAutoPrefix} other terms
+   *  or prefixes.  Set {@code minItemsInAutoPrefix} to 0
+   *  to disable auto-prefix terms. */
   public BlockTreeTermsWriter(SegmentWriteState state,
                               PostingsWriterBase postingsWriter,
                               int minItemsInBlock,
-                              int maxItemsInBlock)
+                              int maxItemsInBlock,
+                              int minItemsInAutoPrefix,
+                              int maxItemsInAutoPrefix)
     throws IOException
   {
-    validateSettings(minItemsInBlock, maxItemsInBlock);
+    validateSettings(minItemsInBlock,
+                     maxItemsInBlock);
 
-    this.maxDoc = state.segmentInfo.maxDoc();
-    this.fieldInfos = state.fieldInfos;
     this.minItemsInBlock = minItemsInBlock;
     this.maxItemsInBlock = maxItemsInBlock;
+
+    validateAutoPrefixSettings(minItemsInAutoPrefix,
+                               maxItemsInAutoPrefix);
+
+    if (minItemsInAutoPrefix != 0) {
+      // TODO: can we used compressed bitset instead?  that auto-upgrades if it's dense enough...
+      prefixDocs = new FixedBitSet(state.segmentInfo.maxDoc());
+      prefixFixedBitsTermsEnum = new BitSetTermsEnum(prefixDocs);
+    } else {
+      prefixDocs = null;
+      prefixFixedBitsTermsEnum = null;
+    }
+
+    this.minItemsInAutoPrefix = minItemsInAutoPrefix;
+    this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
+
+    this.maxDoc = state.segmentInfo.maxDoc();
+    this.fieldInfos = state.fieldInfos;
     this.postingsWriter = postingsWriter;
 
     final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
@@ -269,12 +329,13 @@ public final class BlockTreeTermsWriter
     IndexOutput indexOut = null;
     try {
       CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
-                                  state.segmentInfo.getId(), state.segmentSuffix);
+                                 state.segmentInfo.getId(), state.segmentSuffix);
 
       final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
       indexOut = state.directory.createOutput(indexName, state.context);
       CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
-                                   state.segmentInfo.getId(), state.segmentSuffix);
+                                 state.segmentInfo.getId(), state.segmentSuffix);
+      //segment = state.segmentInfo.name;
 
       postingsWriter.init(termsOut, state);                          // have consumer write its format/header
       
@@ -311,32 +372,107 @@ public final class BlockTreeTermsWriter
     }
   }
 
+  /** Throws {@code IllegalArgumentException} if any of these settings
+   *  is invalid. */
+  public static void validateAutoPrefixSettings(int minItemsInAutoPrefix,
+                                                int maxItemsInAutoPrefix) {
+    if (minItemsInAutoPrefix != 0) {
+      if (minItemsInAutoPrefix < 2) {
+        throw new IllegalArgumentException("minItemsInAutoPrefix must be at least 2; got minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+      if (minItemsInAutoPrefix > maxItemsInAutoPrefix) {
+        throw new IllegalArgumentException("maxItemsInAutoPrefix must be >= minItemsInAutoPrefix; got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+      if (2*(minItemsInAutoPrefix-1) > maxItemsInAutoPrefix) {
+        throw new IllegalArgumentException("maxItemsInAutoPrefix must be at least 2*(minItemsInAutoPrefix-1); got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
+      }
+    } else if (maxItemsInAutoPrefix != 0) {
+      throw new IllegalArgumentException("maxItemsInAutoPrefix must be 0 (disabled) when minItemsInAutoPrefix is 0");
+    }
+  }
+
   @Override
   public void write(Fields fields) throws IOException {
+    //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
 
     String lastField = null;
     for(String field : fields) {
       assert lastField == null || lastField.compareTo(field) < 0;
       lastField = field;
 
+      //if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
       Terms terms = fields.terms(field);
       if (terms == null) {
         continue;
       }
+      FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
 
-      TermsEnum termsEnum = terms.iterator(null);
+      // First pass to find all prefix terms we should compile into the index:
+      List<PrefixTerm> prefixTerms;
+      if (minItemsInAutoPrefix != 0) {
+        if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
+          throw new IllegalStateException("ranges can only be indexed with IndexOptions.DOCS (field: " + fieldInfo.name + ")");
+        }
+        prefixTerms = new AutoPrefixTermsWriter(terms, minItemsInAutoPrefix, maxItemsInAutoPrefix).prefixes;
+        //if (DEBUG) {
+        //  for(PrefixTerm term : prefixTerms) {
+        //    System.out.println("field=" + fieldInfo.name + " PREFIX TERM: " + term);
+        //  }
+        //}
+      } else {
+        prefixTerms = null;
+      }
 
+      TermsEnum termsEnum = terms.iterator(null);
       TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
+      int prefixTermUpto = 0;
       while (true) {
         BytesRef term = termsEnum.next();
+        //if (DEBUG) System.out.println("BTTW: next term " + term);
+
+        // Insert (merge sort) next prefix term(s):
+        if (prefixTerms != null) {
+          while (prefixTermUpto < prefixTerms.size() && (term == null || prefixTerms.get(prefixTermUpto).compareTo(term) <= 0)) {
+            PrefixTerm prefixTerm = prefixTerms.get(prefixTermUpto);
+            //if (DEBUG) System.out.println("seg=" + segment + " field=" + fieldInfo.name + " NOW INSERT prefix=" + prefixTerm);
+            termsWriter.write(prefixTerm.term, getAutoPrefixTermsEnum(terms, prefixTerm), prefixTerm);
+            prefixTermUpto++;
+          }
+        }
+
         if (term == null) {
           break;
         }
-        termsWriter.write(term, termsEnum);
+
+        //if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
+        termsWriter.write(term, termsEnum, null);
       }
 
+      assert prefixTerms == null || prefixTermUpto == prefixTerms.size();
+
       termsWriter.finish();
+
+      //if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
+    }
+  }
+
+  private TermsEnum getAutoPrefixTermsEnum(Terms terms, final PrefixTerm prefix) throws IOException {
+    assert prefixDocs != null;
+    prefixDocs.clear(0, prefixDocs.length());
+
+    prefixTermsEnum = prefix.getTermsEnum(terms.iterator(prefixTermsEnum));
+
+    //System.out.println("BTTW.getAutoPrefixTE: prefix=" + prefix);
+    while (prefixTermsEnum.next() != null) {
+      //System.out.println("    got term=" + prefixTermsEnum.term().utf8ToString());
+      //termCount++;
+      prefixDocsEnum = prefixTermsEnum.postings(null, prefixDocsEnum, 0);
+      //System.out.println("      " + prefixDocsEnum + " doc=" + prefixDocsEnum.docID());
+      prefixDocs.or(prefixDocsEnum);
     }
+
+    //System.out.println("  done terms: " + prefixDocs.cardinality() + " doc seen; " + termCount + " terms seen");
+    return prefixFixedBitsTermsEnum;
   }
   
   static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
@@ -356,30 +492,38 @@ public final class BlockTreeTermsWriter
     public final byte[] termBytes;
     // stats + metadata
     public final BlockTermState state;
+    // Non-null if this is an auto-prefix-term:
+    public final PrefixTerm prefixTerm;
+    public PendingTerm other;
 
-    public PendingTerm(BytesRef term, BlockTermState state) {
+    public PendingTerm(BytesRef term, BlockTermState state, PrefixTerm prefixTerm) {
       super(true);
       this.termBytes = new byte[term.length];
       System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
       this.state = state;
+      this.prefixTerm = prefixTerm;
     }
 
     @Override
     public String toString() {
-      return brToString(termBytes);
+      return "TERM: " + brToString(termBytes);
     }
   }
 
   // for debugging
   @SuppressWarnings("unused")
   static String brToString(BytesRef b) {
-    try {
-      return b.utf8ToString() + " " + b;
-    } catch (Throwable t) {
-      // If BytesRef isn't actually UTF8, or it's eg a
-      // prefix of UTF8 that ends mid-unicode-char, we
-      // fallback to hex:
-      return b.toString();
+    if (b == null) {
+      return "(null)";
+    } else {
+      try {
+        return b.utf8ToString() + " " + b;
+      } catch (Throwable t) {
+        // If BytesRef isn't actually UTF8, or it's eg a
+        // prefix of UTF8 that ends mid-unicode-char, we
+        // fallback to hex:
+        return b.toString();
+      }
     }
   }
 
@@ -410,7 +554,7 @@ public final class BlockTreeTermsWriter
 
     @Override
     public String toString() {
-      return "BLOCK: " + brToString(prefix);
+      return "BLOCK: prefix=" + brToString(prefix);
     }
 
     public void compileIndex(List<PendingBlock> blocks, RAMOutputStream scratchBytes, IntsRefBuilder scratchIntsRef) throws IOException {
@@ -493,6 +637,8 @@ public final class BlockTreeTermsWriter
   private final RAMOutputStream scratchBytes = new RAMOutputStream();
   private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
 
+  static final BytesRef EMPTY_BYTES_REF = new BytesRef();
+
   class TermsWriter {
     private final FieldInfo fieldInfo;
     private final int longsSize;
@@ -529,14 +675,11 @@ public final class BlockTreeTermsWriter
 
       assert count > 0;
 
-      /*
-      if (DEBUG) {
-        BytesRef br = new BytesRef(lastTerm.bytes);
-        br.offset = lastTerm.offset;
-        br.length = prefixLength;
-        System.out.println("writeBlocks: " + br.utf8ToString() + " count=" + count);
-      }
-      */
+      //if (DEBUG2) {
+      //  BytesRef br = new BytesRef(lastTerm.bytes());
+      //  br.length = prefixLength;
+      //  System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" + count);
+      //}
 
       // Root block better write all remaining pending entries:
       assert prefixLength > 0 || count == pending.size();
@@ -547,6 +690,7 @@ public final class BlockTreeTermsWriter
       // only points to sub-blocks in the terms index so we can avoid seeking
       // to it when we are looking for a term):
       boolean hasTerms = false;
+      boolean hasPrefixTerms = false;
       boolean hasSubBlocks = false;
 
       int start = pending.size()-count;
@@ -566,7 +710,7 @@ public final class BlockTreeTermsWriter
             // Suffix is 0, i.e. prefix 'foo' and term is
             // 'foo' so the term has empty string suffix
             // in this block
-            assert lastSuffixLeadLabel == -1;
+            assert lastSuffixLeadLabel == -1: "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
             suffixLeadLabel = -1;
           } else {
             suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
@@ -587,10 +731,11 @@ public final class BlockTreeTermsWriter
             // block as soon as we have at least minItemsInBlock.  This is not always best: it often produces
             // a too-small block as the final block:
             boolean isFloor = itemsInBlock < count;
-            newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
+            newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasPrefixTerms, hasSubBlocks));
 
             hasTerms = false;
             hasSubBlocks = false;
+            hasPrefixTerms = false;
             nextFloorLeadLabel = suffixLeadLabel;
             nextBlockStart = i;
           }
@@ -600,6 +745,7 @@ public final class BlockTreeTermsWriter
 
         if (ent.isTerm) {
           hasTerms = true;
+          hasPrefixTerms |= ((PendingTerm) ent).prefixTerm != null;
         } else {
           hasSubBlocks = true;
         }
@@ -609,7 +755,7 @@ public final class BlockTreeTermsWriter
       if (nextBlockStart < end) {
         int itemsInBlock = end - nextBlockStart;
         boolean isFloor = itemsInBlock < count;
-        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
+        newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasPrefixTerms, hasSubBlocks));
       }
 
       assert newBlocks.isEmpty() == false;
@@ -634,7 +780,8 @@ public final class BlockTreeTermsWriter
      *  were too many (more than maxItemsInBlock) entries sharing the
      *  same prefix, and so we broke it into multiple floor blocks where
      *  we record the starting label of the suffix of each floor block. */
-    private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end, boolean hasTerms, boolean hasSubBlocks) throws IOException {
+    private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end,
+                                    boolean hasTerms, boolean hasPrefixTerms, boolean hasSubBlocks) throws IOException {
 
       assert end > start;
 
@@ -646,6 +793,8 @@ public final class BlockTreeTermsWriter
       System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
       prefix.length = prefixLength;
 
+      //if (DEBUG2) System.out.println("    writeBlock field=" + fieldInfo.name + " prefix=" + brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
+
       // Write block header:
       int numEntries = end - start;
       int code = numEntries << 1;
@@ -666,31 +815,34 @@ public final class BlockTreeTermsWriter
 
       // We optimize the leaf block case (block has only terms), writing a more
       // compact format in this case:
-      boolean isLeafBlock = hasSubBlocks == false;
+      boolean isLeafBlock = hasSubBlocks == false && hasPrefixTerms == false;
+
+      //System.out.println("  isLeaf=" + isLeafBlock);
 
       final List<FST<BytesRef>> subIndices;
 
       boolean absolute = true;
 
       if (isLeafBlock) {
-        // Only terms:
+        // Block contains only ordinary terms:
         subIndices = null;
         for (int i=start;i<end;i++) {
           PendingEntry ent = pending.get(i);
           assert ent.isTerm: "i=" + i;
 
           PendingTerm term = (PendingTerm) ent;
+          assert term.prefixTerm == null;
+
           assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
           BlockTermState state = term.state;
           final int suffix = term.termBytes.length - prefixLength;
-          /*
-          if (DEBUG) {
-            BytesRef suffixBytes = new BytesRef(suffix);
-            System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
-            suffixBytes.length = suffix;
-            System.out.println("    write term suffix=" + brToString(suffixBytes));
-          }
-          */
+          //if (DEBUG2) {
+          //  BytesRef suffixBytes = new BytesRef(suffix);
+          //  System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+          //  suffixBytes.length = suffix;
+          //  System.out.println("    write term suffix=" + brToString(suffixBytes));
+          //}
+
           // For leaf block we write suffix straight
           suffixWriter.writeVInt(suffix);
           suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
@@ -714,27 +866,51 @@ public final class BlockTreeTermsWriter
           absolute = false;
         }
       } else {
-        // Mixed terms and sub-blocks:
+        // Block has at least one prefix term or a sub block:
         subIndices = new ArrayList<>();
+        boolean sawAutoPrefixTerm = false;
         for (int i=start;i<end;i++) {
           PendingEntry ent = pending.get(i);
           if (ent.isTerm) {
             PendingTerm term = (PendingTerm) ent;
+
             assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
             BlockTermState state = term.state;
             final int suffix = term.termBytes.length - prefixLength;
-            /*
-            if (DEBUG) {
-              BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
-              suffixBytes.length = suffix;
-              System.out.println("    write term suffix=" + brToString(suffixBytes));
-            }
-            */
+            //if (DEBUG2) {
+            //  BytesRef suffixBytes = new BytesRef(suffix);
+            //  System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            //  suffixBytes.length = suffix;
+            //  System.out.println("      write term suffix=" + brToString(suffixBytes));
+            //  if (term.prefixTerm != null) {
+            //    System.out.println("        ** auto-prefix term: " + term.prefixTerm);
+            //  }
+            //}
+
             // For non-leaf block we borrow 1 bit to record
-            // if entry is term or sub-block
-            suffixWriter.writeVInt(suffix<<1);
+            // if entry is term or sub-block, and 1 bit to record if
+            // it's a prefix term.  Terms cannot be larger than ~32 KB
+            // so we won't run out of bits:
+            code = suffix<<2;
+            int floorLeadEnd = -1;
+            if (term.prefixTerm != null) {
+              sawAutoPrefixTerm = true;
+              PrefixTerm prefixTerm = term.prefixTerm;
+              floorLeadEnd = prefixTerm.floorLeadEnd;
+              assert floorLeadEnd != -1;
+
+              if (prefixTerm.floorLeadStart == -2) {
+                // Starts with empty string
+                code |= 2;
+              } else {
+                code |= 3;
+              }
+            }
+            suffixWriter.writeVInt(code);
             suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
+            if (floorLeadEnd != -1) {
+              suffixWriter.writeByte((byte) floorLeadEnd);
+            }
             assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
 
             // Write term stats, to separate byte[] blob:
@@ -765,33 +941,32 @@ public final class BlockTreeTermsWriter
             PendingBlock block = (PendingBlock) ent;
             assert StringHelper.startsWith(block.prefix, prefix);
             final int suffix = block.prefix.length - prefixLength;
+            assert StringHelper.startsWith(block.prefix, prefix);
 
             assert suffix > 0;
 
             // For non-leaf block we borrow 1 bit to record
-            // if entry is term or sub-block
-            suffixWriter.writeVInt((suffix<<1)|1);
+            // if entry is term or sub-block, and 1 bit (unset here) to
+            // record if it's a prefix term:
+            suffixWriter.writeVInt((suffix<<2)|1);
             suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
 
-            assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel;
+            //if (DEBUG2) {
+            //  BytesRef suffixBytes = new BytesRef(suffix);
+            //  System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
+            //  suffixBytes.length = suffix;
+            //  System.out.println("      write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
+            //}
 
+            assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel: "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff);
             assert block.fp < startFP;
 
-            /*
-            if (DEBUG) {
-              BytesRef suffixBytes = new BytesRef(suffix);
-              System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
-              suffixBytes.length = suffix;
-              System.out.println("    write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
-            }
-            */
-
             suffixWriter.writeVLong(startFP - block.fp);
             subIndices.add(block.index);
           }
         }
 
-        assert subIndices.size() != 0;
+        assert subIndices.size() != 0 || sawAutoPrefixTerm;
       }
 
       // TODO: we could block-write the term suffix pointers;
@@ -835,7 +1010,7 @@ public final class BlockTreeTermsWriter
     }
     
     /** Writes one term's worth of postings. */
-    public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
+    public void write(BytesRef text, TermsEnum termsEnum, PrefixTerm prefixTerm) throws IOException {
       /*
       if (DEBUG) {
         int[] tmp = new int[lastTerm.length];
@@ -846,19 +1021,25 @@ public final class BlockTreeTermsWriter
 
       BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
       if (state != null) {
+
         assert state.docFreq != 0;
         assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
-        sumDocFreq += state.docFreq;
-        sumTotalTermFreq += state.totalTermFreq;
         pushTerm(text);
        
-        PendingTerm term = new PendingTerm(text, state);
+        PendingTerm term = new PendingTerm(text, state, prefixTerm);
         pending.add(term);
-        numTerms++;
-        if (firstPendingTerm == null) {
-          firstPendingTerm = term;
+        //if (DEBUG) System.out.println("    add pending term = " + text + " pending.size()=" + pending.size());
+
+        if (prefixTerm == null) {
+          // Only increment stats for real terms:
+          sumDocFreq += state.docFreq;
+          sumTotalTermFreq += state.totalTermFreq;
+          numTerms++;
+          if (firstPendingTerm == null) {
+            firstPendingTerm = term;
+          }
+          lastPendingTerm = term;
         }
-        lastPendingTerm = term;
       }
     }
 
@@ -910,6 +1091,7 @@ public final class BlockTreeTermsWriter
         // TODO: if pending.size() is already 1 with a non-zero prefix length
         // we can save writing a "degenerate" root block, but we have to
         // fix all the places that assume the root block's prefix is the empty string:
+        pushTerm(new BytesRef());
         writeBlocks(0, pending.size());
 
         // We better have one final "root" block:

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/FieldReader.java Thu Apr  2 15:37:39 2015
@@ -41,6 +41,8 @@ import org.apache.lucene.util.fst.FST;
  */
 public final class FieldReader extends Terms implements Accountable {
 
+  // private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
   private static final long BASE_RAM_BYTES_USED =
       RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
       + 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
@@ -125,6 +127,7 @@ public final class FieldReader extends T
   /** For debugging -- used by CheckIndex too*/
   @Override
   public Stats getStats() throws IOException {
+    // TODO: add auto-prefix terms into stats
     return new SegmentTermsEnum(this).computeBlockStats();
   }
 
@@ -175,10 +178,11 @@ public final class FieldReader extends T
 
   @Override
   public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-    if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
-      throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
-    }
-    return new IntersectTermsEnum(this, compiled, startTerm);
+    // if (DEBUG) System.out.println("  FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
+    //System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
+    // TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
+    // can we optimize knowing that...?
+    return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
   }
     
   @Override

Modified: lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java?rev=1670929&r1=1670928&r2=1670929&view=diff
==============================================================================
--- lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java (original)
+++ lucene/dev/branches/lucene6271/lucene/core/src/java/org/apache/lucene/codecs/blocktree/IntersectTermsEnum.java Thu Apr  2 15:37:39 2015
@@ -21,6 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermState;
+import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.ArrayUtil;
@@ -28,23 +29,38 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
-import org.apache.lucene.util.automaton.CompiledAutomaton;
+import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.RunAutomaton;
+import org.apache.lucene.util.automaton.Transition;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.Outputs;
 
-// NOTE: cannot seek!
+/** This is used to implement efficient {@link Terms#intersect} for
+ *  block-tree.  Note that it cannot seek, except for the initial term on
+ *  init.  It just "nexts" through the intersection of the automaton and
+ *  the terms.  It does not use the terms index at all: on init, it
+ *  loads the root block, and scans its way to the initial term.
+ *  Likewise, in next it scans until it finds a term that matches the
+ *  current automaton transition.  If the index has auto-prefix terms
+ *  (only for DOCS_ONLY fields currently) it will visit these terms
+ *  when possible and then skip the real terms that auto-prefix term
+ *  matched. */
+
 final class IntersectTermsEnum extends TermsEnum {
+
+  //static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
+
   final IndexInput in;
   final static Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
 
-  private IntersectTermsEnumFrame[] stack;
+  IntersectTermsEnumFrame[] stack;
       
   @SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
 
   final RunAutomaton runAutomaton;
-  final CompiledAutomaton compiledAutomaton;
+  final Automaton automaton;
+  final BytesRef commonSuffix;
 
   private IntersectTermsEnumFrame currentFrame;
 
@@ -52,19 +68,34 @@ final class IntersectTermsEnum extends T
 
   private final FST.BytesReader fstReader;
 
+  private final boolean allowAutoPrefixTerms;
+
   final FieldReader fr;
 
+  /** Which state in the automaton accepts all possible suffixes. */
+  private final int sinkState;
+
   private BytesRef savedStartTerm;
       
+  /** True if we did return the current auto-prefix term */
+  private boolean useAutoPrefixTerm;
+
   // TODO: in some cases we can filter by length?  eg
   // regexp foo*bar must be at least length 6 bytes
-  public IntersectTermsEnum(FieldReader fr, CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
-    // if (DEBUG) {
-    //   System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
-    // }
+  public IntersectTermsEnum(FieldReader fr, Automaton automaton, RunAutomaton runAutomaton, BytesRef commonSuffix, BytesRef startTerm, int sinkState) throws IOException {
+    //if (DEBUG) System.out.println("\nintEnum.init seg=" + fr.parent.segment + " commonSuffix=" + commonSuffix);
     this.fr = fr;
-    runAutomaton = compiled.runAutomaton;
-    compiledAutomaton = compiled;
+    this.sinkState = sinkState;
+
+    assert automaton != null;
+    assert runAutomaton != null;
+
+    //if (DEBUG) System.out.println("sinkState=" + sinkState + " AUTOMATON:\n" + automaton.toDot());
+    this.runAutomaton = runAutomaton;
+    this.allowAutoPrefixTerms = sinkState != -1;
+    this.automaton = automaton;
+    this.commonSuffix = commonSuffix;
+
     in = fr.parent.termsIn.clone();
     stack = new IntersectTermsEnumFrame[5];
     for(int idx=0;idx<stack.length;idx++) {
@@ -152,7 +183,7 @@ final class IntersectTermsEnum extends T
         
     f.fp = f.fpOrig = currentFrame.lastSubFP;
     f.prefix = currentFrame.prefix + currentFrame.suffix;
-    // if (DEBUG) System.out.println("    pushFrame state=" + state + " prefix=" + f.prefix);
+    //if (DEBUG) System.out.println("    pushFrame state=" + state + " prefix=" + f.prefix);
     f.setState(state);
 
     // Walk the arc through the index -- we only
@@ -220,7 +251,7 @@ final class IntersectTermsEnum extends T
   // arbitrary seekExact/Ceil.  Note that this is a
   // seekFloor!
   private void seekToStartTerm(BytesRef target) throws IOException {
-    //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
+    //if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString() + " length=" + target.length);
     assert currentFrame.ord == 0;
     if (term.length < target.length) {
       term.bytes = ArrayUtil.grow(term.bytes, target.length);
@@ -229,23 +260,29 @@ final class IntersectTermsEnum extends T
     assert arc == currentFrame.arc;
 
     for(int idx=0;idx<=target.length;idx++) {
+      //if (DEBUG) System.out.println("cycle idx=" + idx);
 
       while (true) {
+        final int savNextEnt = currentFrame.nextEnt;
         final int savePos = currentFrame.suffixesReader.getPosition();
         final int saveStartBytePos = currentFrame.startBytePos;
         final int saveSuffix = currentFrame.suffix;
         final long saveLastSubFP = currentFrame.lastSubFP;
         final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
+        final boolean saveIsAutoPrefixTerm = currentFrame.isAutoPrefixTerm;
+
+        //if (DEBUG) System.out.println("    cycle isAutoPrefix=" + saveIsAutoPrefixTerm + " ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
 
         final boolean isSubBlock = currentFrame.next();
 
-        //if (DEBUG) System.out.println("    cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
         term.length = currentFrame.prefix + currentFrame.suffix;
         if (term.bytes.length < term.length) {
           term.bytes = ArrayUtil.grow(term.bytes, term.length);
         }
         System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
 
+        //if (DEBUG) System.out.println("      isSubBlock=" + isSubBlock + " term/prefix=" + brToString(term) + " saveIsAutoPrefixTerm=" + saveIsAutoPrefixTerm + " allowAutoPrefixTerms=" + allowAutoPrefixTerms);
+
         if (isSubBlock && StringHelper.startsWith(target, term)) {
           // Recurse
           //if (DEBUG) System.out.println("      recurse!");
@@ -253,9 +290,11 @@ final class IntersectTermsEnum extends T
           break;
         } else {
           final int cmp = term.compareTo(target);
+          //if (DEBUG) System.out.println("      cmp=" + cmp);
           if (cmp < 0) {
             if (currentFrame.nextEnt == currentFrame.entCount) {
               if (!currentFrame.isLastInFloor) {
+                // Advance to next floor block
                 //if (DEBUG) System.out.println("  load floorBlock");
                 currentFrame.loadNextFloorBlock();
                 continue;
@@ -266,19 +305,24 @@ final class IntersectTermsEnum extends T
             }
             continue;
           } else if (cmp == 0) {
+            if (allowAutoPrefixTerms == false && currentFrame.isAutoPrefixTerm) {
+              continue;
+            }
             //if (DEBUG) System.out.println("  return term=" + brToString(term));
             return;
-          } else {
+          } else if (allowAutoPrefixTerms || currentFrame.isAutoPrefixTerm == false) {
             // Fallback to prior entry: the semantics of
             // this method is that the first call to
             // next() will return the term after the
             // requested term
-            currentFrame.nextEnt--;
+            //if (DEBUG) System.out.println("    fallback prior entry");
+            currentFrame.nextEnt = savNextEnt;
             currentFrame.lastSubFP = saveLastSubFP;
             currentFrame.startBytePos = saveStartBytePos;
             currentFrame.suffix = saveSuffix;
             currentFrame.suffixesReader.setPosition(savePos);
             currentFrame.termState.termBlockOrd = saveTermBlockOrd;
+            currentFrame.isAutoPrefixTerm = saveIsAutoPrefixTerm;
             System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
             term.length = currentFrame.prefix + currentFrame.suffix;
             // If the last entry was a block we don't
@@ -297,77 +341,249 @@ final class IntersectTermsEnum extends T
   @Override
   public BytesRef next() throws IOException {
 
-    // if (DEBUG) {
-    //   System.out.println("\nintEnum.next seg=" + segment);
-    //   System.out.println("  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
-    // }
+    //if (DEBUG) {
+    //  System.out.println("\nintEnum.next seg=" + fr.parent.segment);
+    //  System.out.println("  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix + " trans: " + currentFrame.transition + " useAutoPrefix=" + useAutoPrefixTerm);
+    //}
 
     nextTerm:
-    while(true) {
-      // Pop finished frames
-      while (currentFrame.nextEnt == currentFrame.entCount) {
-        if (!currentFrame.isLastInFloor) {
-          //if (DEBUG) System.out.println("    next-floor-block");
-          currentFrame.loadNextFloorBlock();
-          //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+    while (true) {
+
+      boolean isSubBlock;
+
+      if (useAutoPrefixTerm) {
+
+        assert currentFrame.isAutoPrefixTerm;
+        useAutoPrefixTerm = false;
+        currentFrame.termState.isRealTerm = true;
+
+        //if (DEBUG) {
+        //  System.out.println("    now scan beyond auto-prefix term=" + brToString(term) + " floorSuffixLeadEnd=" + Integer.toHexString(currentFrame.floorSuffixLeadEnd));
+        //}
+        // If we last returned an auto-prefix term, we must now skip all
+        // actual terms sharing that prefix.  At most, that skipping
+        // requires popping one frame, but it can also require simply
+        // scanning ahead within the current frame.  This scanning will
+        // skip sub-blocks that contain many terms, which is why the
+        // optimization "works":
+        int floorSuffixLeadEnd = currentFrame.floorSuffixLeadEnd;
+        if (floorSuffixLeadEnd == -1) {
+          // An ordinary prefix, e.g. foo*
+          int prefix = currentFrame.prefix;
+          int suffix = currentFrame.suffix;
+          //if (DEBUG) System.out.println("    prefix=" + prefix + " suffix=" + suffix);
+          if (suffix == 0) {
+            //if (DEBUG) System.out.println("    pop frame & nextTerm");
+
+            // Easy case: the prefix term's suffix is the empty string,
+            // meaning the prefix corresponds to all terms in the
+            // current block, so we just pop this entire block:
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
+            continue nextTerm;
+          } else {
+
+            // Just next() until we hit an entry that doesn't share this
+            // prefix.  The first next should be a sub-block sharing the
+            // same prefix, because if there are enough terms matching a
+            // given prefix to warrant an auto-prefix term, then there
+            // must also be enough to make a sub-block (assuming
+            // minItemsInPrefix > minItemsInBlock):
+            scanPrefix:
+            while (true) {
+              //if (DEBUG) System.out.println("    scan next");
+              if (currentFrame.nextEnt == currentFrame.entCount) {
+                if (currentFrame.isLastInFloor == false) {
+                  currentFrame.loadNextFloorBlock();
+                } else if (currentFrame.ord == 0) {
+                  //if (DEBUG) System.out.println("  return null0");
+                  return null;
+                } else {
+                  // Pop frame, which also means we've moved beyond this
+                  // auto-prefix term:
+                  //if (DEBUG) System.out.println("  pop; nextTerm");
+                  currentFrame = stack[currentFrame.ord-1];
+                  continue nextTerm;
+                }
+              }
+              isSubBlock = currentFrame.next();
+              //if (DEBUG) {
+              //  BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
+              //  System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
+              //  suffixBytes.length = currentFrame.suffix;
+              //  System.out.println("      currentFrame.suffix=" + brToString(suffixBytes));
+              //}
+              for(int i=0;i<suffix;i++) {
+                if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
+                  //if (DEBUG) System.out.println("      done; now stop scan");
+                  break scanPrefix;
+                }
+              }
+            }
+          }
         } else {
-          //if (DEBUG) System.out.println("  pop frame");
-          if (currentFrame.ord == 0) {
-            return null;
-          }
-          final long lastFP = currentFrame.fpOrig;
-          currentFrame = stack[currentFrame.ord-1];
-          assert currentFrame.lastSubFP == lastFP;
-          //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+          // Floor'd auto-prefix term; in this case we must skip all
+          // terms e.g. matching foo[a-m]*.  We are currently "on" fooa,
+          // which the automaton accepted (fooa* through foom*), and
+          // floorSuffixLeadEnd is m, so we must now scan to foon:
+          int prefix = currentFrame.prefix;
+          int suffix = currentFrame.suffix;
+
+          if (currentFrame.floorSuffixLeadStart == -1) {
+            suffix++;
+          }
+
+          //if (DEBUG) System.out.println("      prefix=" + prefix + " suffix=" + suffix);
+
+          if (suffix == 0) {
+
+            //if (DEBUG) System.out.println("  pop frame");
+
+            // This means current frame is fooa*, so we have to first
+            // pop the current frame, then scan in parent frame:
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
+
+            // Current (parent) frame is now foo*, so now we just scan
+            // until the lead suffix byte is > floorSuffixLeadEnd
+            //assert currentFrame.prefix == prefix-1;
+            //prefix = currentFrame.prefix;
+
+            // In case when we pop, and the parent block is not just prefix-1, e.g. in block 417* on
+            // its first term = floor prefix term 41[7-9], popping to block 4*:
+            prefix = currentFrame.prefix;
+
+            suffix = term.length - currentFrame.prefix;
+          } else {
+            // No need to pop; just scan in currentFrame:
+          }
+
+          //if (DEBUG) System.out.println("    start scan: prefix=" + prefix + " suffix=" + suffix);
+
+          // Now we scan until the lead suffix byte is > floorSuffixLeadEnd
+          scanFloor:
+          while (true) {
+            //if (DEBUG) System.out.println("      scan next");
+            if (currentFrame.nextEnt == currentFrame.entCount) {
+              if (currentFrame.isLastInFloor == false) {
+                //if (DEBUG) System.out.println("      next floor block");
+                currentFrame.loadNextFloorBlock();
+              } else if (currentFrame.ord == 0) {
+                //if (DEBUG) System.out.println("  return null");
+                return null;
+              } else {
+                // Pop frame, which also means we've moved beyond this
+                // auto-prefix term:
+                currentFrame = stack[currentFrame.ord-1];
+                //if (DEBUG) System.out.println("      pop, now curFrame.prefix=" + currentFrame.prefix);
+                continue nextTerm;
+              }
+            }
+            isSubBlock = currentFrame.next();
+            //if (DEBUG) {
+            //  BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
+            //  System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
+            //  suffixBytes.length = currentFrame.suffix;
+            //  System.out.println("      currentFrame.suffix=" + brToString(suffixBytes));
+            //}
+            for(int i=0;i<suffix-1;i++) {
+              if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
+                //if (DEBUG) System.out.println("      done; now stop scan");
+                break scanFloor;
+              }
+            }
+            //if (DEBUG) {
+            //  if (currentFrame.suffix >= suffix) {
+            //    System.out.println("      cmp label=" + Integer.toHexString(currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]) + " vs " + floorSuffixLeadEnd);
+            //  }
+            //}
+            if (currentFrame.suffix >= suffix && (currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]&0xff) > floorSuffixLeadEnd) {
+              // Done scanning: we are now on the first term after all
+              // terms matched by this auto-prefix term
+              //if (DEBUG) System.out.println("      done; now stop scan");
+              break;
+            }
+          }
         }
+      } else {
+        // Pop finished frames
+        while (currentFrame.nextEnt == currentFrame.entCount) {
+          if (!currentFrame.isLastInFloor) {
+            //if (DEBUG) System.out.println("    next-floor-block: trans: " + currentFrame.transition);
+            // Advance to next floor block
+            currentFrame.loadNextFloorBlock();
+            //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
+            break;
+          } else {
+            //if (DEBUG) System.out.println("  pop frame");
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            final long lastFP = currentFrame.fpOrig;
+            currentFrame = stack[currentFrame.ord-1];
+            assert currentFrame.lastSubFP == lastFP;
+            //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
+          }
+        }
+
+        isSubBlock = currentFrame.next();
       }
 
-      final boolean isSubBlock = currentFrame.next();
-      // if (DEBUG) {
-      //   final BytesRef suffixRef = new BytesRef();
-      //   suffixRef.bytes = currentFrame.suffixBytes;
-      //   suffixRef.offset = currentFrame.startBytePos;
-      //   suffixRef.length = currentFrame.suffix;
-      //   System.out.println("    " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
-      // }
+      //if (DEBUG) {
+      //  final BytesRef suffixRef = new BytesRef();
+      //  suffixRef.bytes = currentFrame.suffixBytes;
+      //  suffixRef.offset = currentFrame.startBytePos;
+      //  suffixRef.length = currentFrame.suffix;
+      //  System.out.println("    " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
+      //}
 
       if (currentFrame.suffix != 0) {
+        // Advance where we are in the automaton to match what terms
+        // dict next'd to:
         final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
+        //if (DEBUG) {
+        //  System.out.println("    move automaton to label=" + label + " vs curMax=" + currentFrame.curTransitionMax);
+        // }
         while (label > currentFrame.curTransitionMax) {
           if (currentFrame.transitionIndex >= currentFrame.transitionCount-1) {
-            // Stop processing this frame -- no further
-            // matches are possible because we've moved
-            // beyond what the max transition will allow
-            //if (DEBUG) System.out.println("      break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
-
-            // sneaky!  forces a pop above
-            currentFrame.isLastInFloor = true;
-            currentFrame.nextEnt = currentFrame.entCount;
+            // Pop this frame: no further matches are possible because
+            // we've moved beyond what the max transition will allow
+            //if (DEBUG) System.out.println("      break: trans");
+            if (currentFrame.ord == 0) {
+              //if (DEBUG) System.out.println("  return null");
+              return null;
+            }
+            currentFrame = stack[currentFrame.ord-1];
             continue nextTerm;
           }
           currentFrame.transitionIndex++;
-          compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
+          automaton.getNextTransition(currentFrame.transition);
           currentFrame.curTransitionMax = currentFrame.transition.max;
-          //if (DEBUG) System.out.println("      next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
+          //if (DEBUG) System.out.println("      next trans");
         }
       }
 
       // First test the common suffix, if set:
-      if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
+      if (commonSuffix != null && !isSubBlock) {
         final int termLen = currentFrame.prefix + currentFrame.suffix;
-        if (termLen < compiledAutomaton.commonSuffixRef.length) {
+        if (termLen < commonSuffix.length) {
           // No match
-          // if (DEBUG) {
-          //   System.out.println("      skip: common suffix length");
-          // }
+          //if (DEBUG) System.out.println("      skip: common suffix length");
           continue nextTerm;
         }
 
         final byte[] suffixBytes = currentFrame.suffixBytes;
-        final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
+        final byte[] commonSuffixBytes = commonSuffix.bytes;
 
-        final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
-        assert compiledAutomaton.commonSuffixRef.offset == 0;
+        final int lenInPrefix = commonSuffix.length - currentFrame.suffix;
+        assert commonSuffix.offset == 0;
         int suffixBytesPos;
         int commonSuffixBytesPos = 0;
 
@@ -381,24 +597,20 @@ final class IntersectTermsEnum extends T
           final int termBytesPosEnd = currentFrame.prefix;
           while (termBytesPos < termBytesPosEnd) {
             if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
-              // if (DEBUG) {
-              //   System.out.println("      skip: common suffix mismatch (in prefix)");
-              // }
+              //if (DEBUG) System.out.println("      skip: common suffix mismatch (in prefix)");
               continue nextTerm;
             }
           }
           suffixBytesPos = currentFrame.startBytePos;
         } else {
-          suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
+          suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffix.length;
         }
 
         // Test overlapping suffix part:
-        final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
+        final int commonSuffixBytesPosEnd = commonSuffix.length;
         while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
           if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
-            // if (DEBUG) {
-            //   System.out.println("      skip: common suffix mismatch");
-            // }
+            //if (DEBUG) System.out.println("      skip: common suffix mismatch");
             continue nextTerm;
           }
         }
@@ -410,10 +622,19 @@ final class IntersectTermsEnum extends T
       // "temporarily" accepted, we just blindly .next()
       // until the limit
 
-      // See if the term prefix matches the automaton:
+      // TODO: for first iter of this loop can't we just use the current trans?  we already advanced it and confirmed it matches lead
+      // byte of the suffix
+
+      // See if the term suffix matches the automaton:
       int state = currentFrame.state;
+      int lastState = currentFrame.lastState;
+      //if (DEBUG) {
+      //  System.out.println("  a state=" + state + " curFrame.suffix.len=" + currentFrame.suffix + " curFrame.prefix=" + currentFrame.prefix);
+      // }
       for (int idx=0;idx<currentFrame.suffix;idx++) {
-        state = runAutomaton.step(state,  currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
+        lastState = state;
+        //if (DEBUG) System.out.println("    step label=" + (char) (currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff));
+        state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
         if (state == -1) {
           // No match
           //System.out.println("    no s=" + state);
@@ -423,16 +644,59 @@ final class IntersectTermsEnum extends T
         }
       }
 
+      //if (DEBUG) System.out.println("    after suffix: state=" + state + " lastState=" + lastState);
+
       if (isSubBlock) {
         // Match!  Recurse:
         //if (DEBUG) System.out.println("      sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
         copyTerm();
         currentFrame = pushFrame(state);
-        //if (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+        currentFrame.lastState = lastState;
+        //xif (DEBUG) System.out.println("\n  frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
+      } else if (currentFrame.isAutoPrefixTerm) {
+        // We are on an auto-prefix term, meaning this term was compiled
+        // at indexing time, matching all terms sharing this prefix (or,
+        // a floor'd subset of them if that count was too high).  A
+        // prefix term represents a range of terms, so we now need to
+        // test whether, from the current state in the automaton, it
+        // accepts all terms in that range.  As long as it does, we can
+        // use this term and then later skip ahead past all terms in
+        // this range:
+        if (allowAutoPrefixTerms) {
+
+          if (currentFrame.floorSuffixLeadEnd == -1) {
+            // Simple prefix case
+            useAutoPrefixTerm = state == sinkState;
+          } else {
+            if (currentFrame.floorSuffixLeadStart == -1) {
+              // Must also accept the empty string in this case
+              if (automaton.isAccept(state)) {
+                //if (DEBUG) System.out.println("      state is accept");
+                useAutoPrefixTerm = acceptsSuffixRange(state, 0, currentFrame.floorSuffixLeadEnd);
+              }
+            } else {
+              useAutoPrefixTerm = acceptsSuffixRange(lastState, currentFrame.floorSuffixLeadStart, currentFrame.floorSuffixLeadEnd);
+            }
+          }
+
+          //if (DEBUG) System.out.println("  useAutoPrefixTerm=" + useAutoPrefixTerm);
+
+          if (useAutoPrefixTerm) {
+            copyTerm();
+            currentFrame.termState.isRealTerm = false;
+            //if (DEBUG) System.out.println("  return auto prefix term: " + brToString(term));
+            return term;
+          } else {
+            // We move onto the next term
+          }
+        } else {
+          // We are not allowed to use auto-prefix terms, so we just skip it
+        }
       } else if (runAutomaton.isAccept(state)) {
         copyTerm();
-        //if (DEBUG) System.out.println("      term match to state=" + state + "; return term=" + brToString(term));
+        //if (DEBUG) System.out.println("      term match to state=" + state);
         assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
+        //if (DEBUG) System.out.println("      return term=" + brToString(term));
         return term;
       } else {
         //System.out.println("    no s=" + state);
@@ -440,6 +704,41 @@ final class IntersectTermsEnum extends T
     }
   }
 
+  private final Transition transition = new Transition();
+
+  /** Returns true if, from this state, the automaton accepts any suffix
+   *  starting with a label between start and end, inclusive.  We just
+   *  look for a transition, matching this range, to the sink state.  */
+  private boolean acceptsSuffixRange(int state, int start, int end) {
+
+    //xif (DEBUG) System.out.println("    acceptsSuffixRange state=" + state + " start=" + start + " end=" + end);
+
+    int count = automaton.initTransition(state, transition);
+    //xif (DEBUG) System.out.println("      transCount=" + count);
+    //xif (DEBUG) System.out.println("      trans=" + transition);
+    for(int i=0;i<count;i++) {
+      automaton.getNextTransition(transition);
+      if (start >= transition.min && end <= transition.max && transition.dest == sinkState) {
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  // for debugging
+  @SuppressWarnings("unused")
+  static String brToString(BytesRef b) {
+    try {
+      return b.utf8ToString() + " " + b;
+    } catch (Throwable t) {
+      // If BytesRef isn't actually UTF8, or it's eg a
+      // prefix of UTF8 that ends mid-unicode-char, we
+      // fallback to hex:
+      return b.toString();
+    }
+  }
+
   private void copyTerm() {
     //System.out.println("      copyTerm cur.prefix=" + currentFrame.prefix + " cur.suffix=" + currentFrame.suffix + " first=" + (char) currentFrame.suffixBytes[currentFrame.startBytePos]);
     final int len = currentFrame.prefix + currentFrame.suffix;