You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by no...@apache.org on 2017/02/15 01:35:30 UTC
[4/4] lucene-solr:master: Revert "Merge remote-tracking branch 'origin/master'"

Revert "Merge remote-tracking branch 'origin/master'"

This reverts commit 26298f35df118aad186e6eaf1ea5c2e5f50d607f, reversing
changes made to 65c6c576b720b19029a10bf14f81d4de23302863.

undoing the merge commit


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/325cbf00
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/325cbf00
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/325cbf00

Branch: refs/heads/master
Commit: 325cbf00185e3fe2576720c83635cbed8ed12c0e
Parents: 26298f3
Author: Noble Paul <no...@gmail.com>
Authored: Tue Feb 14 17:32:14 2017 -0800
Committer: Noble Paul <no...@gmail.com>
Committed: Tue Feb 14 17:32:14 2017 -0800

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  10 -
 .../pattern/SimplePatternSplitTokenizer.java    | 258 --------
 .../SimplePatternSplitTokenizerFactory.java     |  76 ---
 .../pattern/SimplePatternTokenizer.java         | 242 -------
 .../pattern/SimplePatternTokenizerFactory.java  |  76 ---
 ...apache.lucene.analysis.util.TokenizerFactory |   2 -
 .../lucene/analysis/core/TestRandomChains.java  |  10 +-
 .../TestSimplePatternSplitTokenizer.java        | 273 --------
 .../pattern/TestSimplePatternTokenizer.java     | 218 -------
 lucene/common-build.xml                         |   2 +-
 .../apache/lucene/analysis/package-info.java    |  12 +-
 .../lucene/util/automaton/ByteRunAutomaton.java |   4 +-
 .../util/automaton/CharacterRunAutomaton.java   |   2 +-
 .../lucene/util/automaton/Operations.java       |  44 +-
 .../lucene/util/automaton/RunAutomaton.java     | 126 ++--
 lucene/demo/ivy.xml                             |   2 +-
 .../lucene/search/TermAutomatonScorer.java      |   2 +-
 lucene/test-framework/ivy.xml                   |   2 +-
 .../lucene/index/BasePointsFormatTestCase.java  |  41 +-
 lucene/tools/ivy.xml                            |   2 +-
 .../dependencies/GetMavenDependenciesTask.java  |  58 +-
 solr/CHANGES.txt                                |  21 +-
 solr/contrib/extraction/ivy.xml                 |   2 +-
 .../java/org/apache/solr/cloud/Overseer.java    |   2 +-
 .../handler/component/RealTimeGetComponent.java |   7 +-
 .../component/SortedNumericStatsValues.java     | 106 ----
 .../solr/handler/component/StatsField.java      |   2 +-
 .../handler/component/StatsValuesFactory.java   |   7 +-
 .../org/apache/solr/request/IntervalFacets.java |  77 +--
 .../org/apache/solr/request/NumericFacets.java  | 173 +----
 .../org/apache/solr/request/SimpleFacets.java   |   8 +-
 .../apache/solr/schema/DoublePointField.java    |  13 +-
 .../java/org/apache/solr/schema/FieldType.java  |   2 +-
 .../org/apache/solr/schema/FloatPointField.java |  16 +-
 .../org/apache/solr/schema/IntPointField.java   |  10 +-
 .../org/apache/solr/schema/LongPointField.java  |  10 +-
 .../apache/solr/schema/NumericFieldType.java    |  48 +-
 .../java/org/apache/solr/schema/PointField.java |  31 +-
 .../apache/solr/search/SolrIndexSearcher.java   | 212 +++----
 .../solr/store/blockcache/BlockCache.java       |  10 +-
 .../solr/uninverting/UninvertingReader.java     |  37 +-
 .../conf/schema-distrib-interval-faceting.xml   |   8 +-
 .../conf/schema-docValuesFaceting.xml           |  11 +-
 .../solr/collection1/conf/schema-point.xml      |   6 -
 .../test-files/solr/collection1/conf/schema.xml |  55 +-
 .../solr/collection1/conf/schema11.xml          |  28 +-
 .../solr/collection1/conf/schema12.xml          |  14 +-
 .../solr/collection1/conf/schema_latest.xml     |  50 +-
 .../conf/solrconfig-update-processor-chains.xml |   2 -
 .../org/apache/solr/TestDistributedSearch.java  |   8 +-
 .../org/apache/solr/TestGroupingSearch.java     |   1 -
 .../org/apache/solr/TestRandomDVFaceting.java   |  26 +-
 .../cloud/SegmentTerminateEarlyTestState.java   |   4 +-
 .../apache/solr/cloud/TestSegmentSorting.java   |   2 -
 .../handler/component/StatsComponentTest.java   |   7 +-
 .../handler/component/TermsComponentTest.java   |   2 -
 .../apache/solr/request/TestFacetMethods.java   |  11 +-
 .../solr/request/TestIntervalFaceting.java      |  12 +-
 .../org/apache/solr/schema/TestPointFields.java | 631 ++++++-------------
 .../apache/solr/search/TestSolrQueryParser.java |   2 +-
 .../update/processor/AtomicUpdatesTest.java     |  64 +-
 solr/test-framework/ivy.xml                     |   2 +-
 .../java/org/apache/solr/SolrTestCaseJ4.java    |   4 -
 .../apache/solr/cloud/MiniSolrCloudCluster.java |   2 +-
 64 files changed, 582 insertions(+), 2626 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index e4042da..f9c464b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -107,11 +107,6 @@ New Features
   SortedNumericSelector.Type can give a ValueSource view of a 
   SortedNumericDocValues field. (Tom�s Fern�ndez L�bbe)
 
-* LUCENE-7465: Add SimplePatternTokenizer and
-  SimplePatternSplitTokenizer, using Lucene's regexp/automaton
-  implementation for analysis/tokenization (Clinton Gormley, Mike
-  McCandless)
-
 Bug Fixes
 
 * LUCENE-7630: Fix (Edge)NGramTokenFilter to no longer drop payloads
@@ -176,11 +171,6 @@ Build
   Jars are not downloaded; compilation is not updated; and Clover is not enabled.
   (Steve Rowe)
 
-* LUCENE-7694: Update forbiddenapis to version 2.3. (Uwe Schindler)
-
-* LUCENE-7693: Replace "org.apache." logic in GetMavenDependenciesTask.
-  (Daniel Collins, Christine Poerschke)
-
 Other
 
 * LUCENE-7666: Fix typos in lucene-join package info javadoc.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
deleted file mode 100644
index d2b10c1..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizer.java
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.pattern;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-
-/**
- * This tokenizer uses a Lucene {@link RegExp} or (expert usage) a pre-built determinized {@link Automaton}, to locate tokens.
- * The regexp syntax is more limited than {@link PatternTokenizer}, but the tokenization is quite a bit faster.  This is just
- * like {@link SimplePatternTokenizer} except that the pattern shold make valid token separator characters, like
- * {@code String.split}.  Empty string tokens are never produced.
- *
- * @lucene.experimental
- */
-
-public final class SimplePatternSplitTokenizer extends Tokenizer {
-
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
-  private final CharacterRunAutomaton runDFA;
-
-  // TODO: this is copied from SimplePatternTokenizer, but there are subtle differences e.g. we track sepUpto an tokenUpto;
-  // find a clean way to share it:
-
-  // TODO: we could likely use a single rolling buffer instead of two separate char buffers here.  We could also use PushBackReader but I
-  // suspect it's slowish:
-
-  private char[] pendingChars = new char[8];
-  private int tokenUpto;
-  private int pendingLimit;
-  private int pendingUpto;
-  private int offset;
-  private int sepUpto;
-  private final char[] buffer = new char[1024];
-  private int bufferLimit;
-  private int bufferNextRead;
-
-  /** See {@link RegExp} for the accepted syntax. */
-  public SimplePatternSplitTokenizer(String regexp) {
-    this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, regexp, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-  }
-
-  /** Runs a pre-built automaton. */
-  public SimplePatternSplitTokenizer(Automaton dfa) {
-    this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, dfa);
-  }
-
-  /** See {@link RegExp} for the accepted syntax. */
-  public SimplePatternSplitTokenizer(AttributeFactory factory, String regexp, int maxDeterminizedStates) {
-    this(factory, new RegExp(regexp).toAutomaton());
-  }
-
-  /** Runs a pre-built automaton. */
-  public SimplePatternSplitTokenizer(AttributeFactory factory, Automaton dfa) {
-    super(factory);
-
-    // we require user to do this up front because it is a possibly very costly operation, and user may be creating us frequently, not
-    // realizing this ctor is otherwise trappy
-    if (dfa.isDeterministic() == false) {
-      throw new IllegalArgumentException("please determinize the incoming automaton first");
-    }
-
-    runDFA = new CharacterRunAutomaton(dfa, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-  }
-
-  private void fillToken(int offsetStart) {
-    termAtt.setLength(tokenUpto);
-    offsetAtt.setOffset(correctOffset(offsetStart), correctOffset(offsetStart+tokenUpto));
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-
-    int offsetStart = offset;
-
-    clearAttributes();
-
-    tokenUpto = 0;
-
-    while (true) {
-      sepUpto = 0;
-
-      // The runDFA operates in Unicode space, not UTF16 (java's char):
-      int ch = nextCodePoint();
-      if (ch == -1) {
-        if (tokenUpto > 0) {
-          fillToken(offsetStart);
-          return true;
-        } else {
-          return false;
-        }
-      }
-      int state = runDFA.step(0, ch);
-
-      if (state != -1) {
-        // a token separator just possibly started; keep scanning to see if the token is accepted:
-        int lastAcceptLength = -1;
-        do {
-
-          if (runDFA.isAccept(state)) {
-            // record that the token separator matches here, but keep scanning in case a longer match also works (greedy):
-            lastAcceptLength = sepUpto;
-          }
-
-          ch = nextCodePoint();
-          if (ch == -1) {
-            break;
-          }
-          state = runDFA.step(state, ch);
-        } while (state != -1);
-        
-        if (lastAcceptLength != -1) {
-          // strip the trailing separater we just matched from the token:
-          tokenUpto -= lastAcceptLength;
-          // we found a token separator
-          int extra = sepUpto - lastAcceptLength;
-          if (extra != 0) {
-            pushBack(extra);
-          }
-          if (tokenUpto > 0) {
-            fillToken(offsetStart);
-            return true;
-          } else {
-            // we matched one token separator immediately after another
-            offsetStart = offset;
-          }
-        } else if (ch == -1) {
-          if (tokenUpto > 0) {
-            fillToken(offsetStart);
-            return true;
-          } else {
-            return false;
-          }
-        } else {
-          // false alarm: there was no token separator here; push back all but the first character we scanned
-          pushBack(sepUpto-1);
-        }
-      }
-    }
-  }
-
-  @Override
-  public void end() throws IOException {
-    super.end();
-    final int ofs = correctOffset(offset + pendingLimit - pendingUpto);
-    offsetAtt.setOffset(ofs, ofs);
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    offset = 0;
-    pendingUpto = 0;
-    pendingLimit = 0;
-    sepUpto = 0;
-    bufferNextRead = 0;
-    bufferLimit = 0;
-  }
-
-  /** Pushes back the last {@code count} characters in current token's buffer. */
-  private void pushBack(int count) {
-    tokenUpto -= count;
-    assert tokenUpto >= 0;
-    if (pendingLimit == 0) {
-      if (bufferNextRead >= count) {
-        // optimize common case when the chars we are pushing back are still in the buffer
-        bufferNextRead -= count;
-      } else {
-        if (count > pendingChars.length) {
-          pendingChars = ArrayUtil.grow(pendingChars, count);
-        }
-        System.arraycopy(termAtt.buffer(), tokenUpto - count, pendingChars, 0, count);
-        pendingLimit = count;
-      }
-    } else {
-      // we are pushing back what is already in our pending buffer
-      pendingUpto -= count;
-      assert pendingUpto >= 0;
-    }
-    offset -= count;
-  }
-
-  private void appendToToken(char ch) {
-    char[] buffer = termAtt.buffer();
-    if (tokenUpto == buffer.length) {
-      buffer = termAtt.resizeBuffer(tokenUpto + 1);
-    }
-    buffer[tokenUpto++] = ch;
-    sepUpto++;
-  }
-
-  private int nextCodeUnit() throws IOException {
-    int result;
-    if (pendingUpto < pendingLimit) {
-      result = pendingChars[pendingUpto++];
-      if (pendingUpto == pendingLimit) {
-        // We used up the pending buffer
-        pendingUpto = 0;
-        pendingLimit = 0;
-      }
-      appendToToken((char) result);
-      offset++;
-    } else if (bufferLimit == -1) {
-      return -1;
-    } else {
-      assert bufferNextRead <= bufferLimit: "bufferNextRead=" + bufferNextRead + " bufferLimit=" + bufferLimit;
-      if (bufferNextRead == bufferLimit) {
-        bufferLimit = input.read(buffer, 0, buffer.length);
-        if (bufferLimit == -1) {
-          return -1;
-        }
-        bufferNextRead = 0;
-      }
-      result = buffer[bufferNextRead++];
-      offset++;
-      appendToToken((char) result);
-    }
-    return result;
-  }
-  
-  private int nextCodePoint() throws IOException {
-
-    int ch = nextCodeUnit();
-    if (ch == -1) {
-      return ch;
-    }
-    if (Character.isHighSurrogate((char) ch)) {
-      return Character.toCodePoint((char) ch, (char) nextCodeUnit());
-    } else {
-      return ch;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
deleted file mode 100644
index 4af6286..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternSplitTokenizerFactory.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.pattern;
-
-import java.util.Map;
-
-import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-
-/**
- * Factory for {@link SimplePatternSplitTokenizer}, for producing tokens by splitting according to the provided regexp.
- *
- * <p>This tokenizer uses Lucene {@link RegExp} pattern matching to construct distinct tokens
- * for the input stream.  The syntax is more limited than {@link PatternTokenizer}, but the
- * tokenization is quite a bit faster.  It takes two arguments:
- * <br>
- * <ul>
- * <li>"pattern" (required) is the regular expression, according to the syntax described at {@link RegExp}</li>
- * <li>"maxDeterminizedStates" (optional, default 10000) the limit on total state count for the determined automaton computed from the regexp</li>
- * </ul>
- * <p>
- * The pattern matches the characters that should split tokens, like {@code String.split}, and the
- * matching is greedy such that the longest token separator matching at a given point is matched.  Empty
- * tokens are never created.
- *
- * <p>For example, to match tokens delimited by simple whitespace characters:
- *
- * <pre class="prettyprint">
- * &lt;fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.SimplePatternSplitTokenizerFactory" pattern="[ \t\r\n]+"/&gt;
- *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre> 
- * 
- * @lucene.experimental
- *
- * @see SimplePatternSplitTokenizer
- */
-public class SimplePatternSplitTokenizerFactory extends TokenizerFactory {
-  public static final String PATTERN = "pattern";
-  private final Automaton dfa;
-  private final int maxDeterminizedStates;
- 
-  /** Creates a new SimpleSplitPatternTokenizerFactory */
-  public SimplePatternSplitTokenizerFactory(Map<String,String> args) {
-    super(args);
-    maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-    dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
-    if (args.isEmpty() == false) {
-      throw new IllegalArgumentException("Unknown parameters: " + args);
-    }
-  }
-  
-  @Override
-  public SimplePatternSplitTokenizer create(final AttributeFactory factory) {
-    return new SimplePatternSplitTokenizer(factory, dfa);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizer.java
deleted file mode 100644
index 867b10a..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizer.java
+++ /dev/null
@@ -1,242 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.pattern;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-
-/**
- * This tokenizer uses a Lucene {@link RegExp} or (expert usage) a pre-built determinized {@link Automaton}, to locate tokens.
- * The regexp syntax is more limited than {@link PatternTokenizer}, but the tokenization is quite a bit faster.  The provided
- * regex should match valid token characters (not token separator characters, like {@code String.split}).  The matching is greedy:
- * the longest match at a given start point will be the next token.  Empty string tokens are never produced.
- *
- * @lucene.experimental
- */
-
-// TODO: the matcher here is naive and does have N^2 adversarial cases that are unlikely to arise in practice, e.g. if the pattern is
-// aaaaaaaaaab and the input is aaaaaaaaaaa, the work we do here is N^2 where N is the number of a's.  This is because on failing to match
-// a token, we skip one character forward and try again.  A better approach would be to compile something like this regexp
-// instead: .* | <pattern>, because that automaton would not "forget" all the as it had already seen, and would be a single pass
-// through the input.  I think this is the same thing as Aho/Corasick's algorithm (http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm).
-// But we cannot implement this (I think?) until/unless Lucene regexps support sub-group capture, so we could know
-// which specific characters the pattern matched.  SynonymFilter has this same limitation.
-
-public final class SimplePatternTokenizer extends Tokenizer {
-
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-
-  private final CharacterRunAutomaton runDFA;
-
-  // TODO: we could likely use a single rolling buffer instead of two separate char buffers here.  We could also use PushBackReader but I
-  // suspect it's slowish:
-
-  private char[] pendingChars = new char[8];
-  private int pendingLimit;
-  private int pendingUpto;
-  private int offset;
-  private int tokenUpto;
-  private final char[] buffer = new char[1024];
-  private int bufferLimit;
-  private int bufferNextRead;
-
-  /** See {@link RegExp} for the accepted syntax. */
-  public SimplePatternTokenizer(String regexp) {
-    this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, regexp, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-  }
-
-  /** Runs a pre-built automaton. */
-  public SimplePatternTokenizer(Automaton dfa) {
-    this(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, dfa);
-  }
-
-  /** See {@link RegExp} for the accepted syntax. */
-  public SimplePatternTokenizer(AttributeFactory factory, String regexp, int maxDeterminizedStates) {
-    this(factory, new RegExp(regexp).toAutomaton());
-  }
-
-  /** Runs a pre-built automaton. */
-  public SimplePatternTokenizer(AttributeFactory factory, Automaton dfa) {
-    super(factory);
-
-    // we require user to do this up front because it is a possibly very costly operation, and user may be creating us frequently, not
-    // realizing this ctor is otherwise trappy
-    if (dfa.isDeterministic() == false) {
-      throw new IllegalArgumentException("please determinize the incoming automaton first");
-    }
-
-    runDFA = new CharacterRunAutomaton(dfa, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-  }
-
-  @Override
-  public boolean incrementToken() throws IOException {
-
-    clearAttributes();
-    tokenUpto = 0;
-
-    while (true) {
-
-      int offsetStart = offset;
-
-      // The runDFA operates in Unicode space, not UTF16 (java's char):
-
-      int ch = nextCodePoint();
-      if (ch == -1) {
-        return false;
-      }
-
-      int state = runDFA.step(0, ch);
-
-      if (state != -1) {
-        // a token just possibly started; keep scanning to see if the token is accepted:
-        int lastAcceptLength = -1;
-        do {
-
-          if (runDFA.isAccept(state)) {
-            // record that the token matches here, but keep scanning in case a longer match also works (greedy):
-            lastAcceptLength = tokenUpto;
-          }
-
-          ch = nextCodePoint();
-          if (ch == -1) {
-            break;
-          }
-          state = runDFA.step(state, ch);
-        } while (state != -1);
-        
-        if (lastAcceptLength != -1) {
-          // we found a token
-          int extra = tokenUpto - lastAcceptLength;
-          if (extra != 0) {
-            pushBack(extra);
-          }
-          termAtt.setLength(lastAcceptLength);
-          offsetAtt.setOffset(correctOffset(offsetStart), correctOffset(offsetStart+lastAcceptLength));
-          return true;
-        } else if (ch == -1) {
-          return false;
-        } else {
-          // false alarm: there was no token here; push back all but the first character we scanned
-          pushBack(tokenUpto-1);
-          tokenUpto = 0;
-        }
-      } else {
-        tokenUpto = 0;
-      }
-    }
-  }
-
-  @Override
-  public void end() throws IOException {
-    super.end();
-    final int ofs = correctOffset(offset + pendingLimit - pendingUpto);
-    offsetAtt.setOffset(ofs, ofs);
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    offset = 0;
-    pendingUpto = 0;
-    pendingLimit = 0;
-    tokenUpto = 0;
-    bufferNextRead = 0;
-    bufferLimit = 0;
-  }
-
-  /** Pushes back the last {@code count} characters in current token's buffer. */
-  private void pushBack(int count) {
-    
-    if (pendingLimit == 0) {
-      if (bufferNextRead >= count) {
-        // optimize common case when the chars we are pushing back are still in the buffer
-        bufferNextRead -= count;
-      } else {
-        if (count > pendingChars.length) {
-          pendingChars = ArrayUtil.grow(pendingChars, count);
-        }
-        System.arraycopy(termAtt.buffer(), tokenUpto - count, pendingChars, 0, count);
-        pendingLimit = count;
-      }
-    } else {
-      // we are pushing back what is already in our pending buffer
-      pendingUpto -= count;
-      assert pendingUpto >= 0;
-    }
-    offset -= count;
-  }
-
-  private void appendToToken(char ch) {
-    char[] buffer = termAtt.buffer();
-    if (tokenUpto == buffer.length) {
-      buffer = termAtt.resizeBuffer(tokenUpto + 1);
-    }
-    buffer[tokenUpto++] = ch;
-  }
-
-  private int nextCodeUnit() throws IOException {
-    int result;
-    if (pendingUpto < pendingLimit) {
-      result = pendingChars[pendingUpto++];
-      if (pendingUpto == pendingLimit) {
-        // We used up the pending buffer
-        pendingUpto = 0;
-        pendingLimit = 0;
-      }
-      appendToToken((char) result);
-      offset++;
-    } else if (bufferLimit == -1) {
-      return -1;
-    } else {
-      assert bufferNextRead <= bufferLimit: "bufferNextRead=" + bufferNextRead + " bufferLimit=" + bufferLimit;
-      if (bufferNextRead == bufferLimit) {
-        bufferLimit = input.read(buffer, 0, buffer.length);
-        if (bufferLimit == -1) {
-          return -1;
-        }
-        bufferNextRead = 0;
-      }
-      result = buffer[bufferNextRead++];
-      offset++;
-      appendToToken((char) result);
-    }
-    return result;
-  }
-  
-  private int nextCodePoint() throws IOException {
-
-    int ch = nextCodeUnit();
-    if (ch == -1) {
-      return ch;
-    }
-    if (Character.isHighSurrogate((char) ch)) {
-      return Character.toCodePoint((char) ch, (char) nextCodeUnit());
-    } else {
-      return ch;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizerFactory.java
deleted file mode 100644
index 3e74d02..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pattern/SimplePatternTokenizerFactory.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.analysis.pattern;
-
-import java.util.Map;
-
-import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
-
-/**
- * Factory for {@link SimplePatternTokenizer}, for matching tokens based on the provided regexp.
- *
- * <p>This tokenizer uses Lucene {@link RegExp} pattern matching to construct distinct tokens
- * for the input stream.  The syntax is more limited than {@link PatternTokenizer}, but the
- * tokenization is quite a bit faster.  It takes two arguments:
- * <br>
- * <ul>
- * <li>"pattern" (required) is the regular expression, according to the syntax described at {@link RegExp}</li>
- * <li>"maxDeterminizedStates" (optional, default 10000) the limit on total state count for the determined automaton computed from the regexp</li>
- * </ul>
- * <p>
- * The pattern matches the characters to include in a token (not the split characters), and the
- * matching is greedy such that the longest token matching at a given point is created.  Empty
- * tokens are never created.
- *
- * <p>For example, to match tokens delimited by simple whitespace characters:
- *
- * <pre class="prettyprint">
- * &lt;fieldType name="text_ptn" class="solr.TextField" positionIncrementGap="100"&gt;
- *   &lt;analyzer&gt;
- *     &lt;tokenizer class="solr.SimplePatternTokenizerFactory" pattern="[^ \t\r\n]+"/&gt;
- *   &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre> 
- *
- * @lucene.experimental
- * 
- * @see SimplePatternTokenizer
- */
-public class SimplePatternTokenizerFactory extends TokenizerFactory {
-  public static final String PATTERN = "pattern";
-  private final Automaton dfa;
-  private final int maxDeterminizedStates;
- 
-  /** Creates a new SimplePatternTokenizerFactory */
-  public SimplePatternTokenizerFactory(Map<String,String> args) {
-    super(args);
-    maxDeterminizedStates = getInt(args, "maxDeterminizedStates", Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-    dfa = Operations.determinize(new RegExp(require(args, PATTERN)).toAutomaton(), maxDeterminizedStates);
-    if (args.isEmpty() == false) {
-      throw new IllegalArgumentException("Unknown parameters: " + args);
-    }
-  }
-  
-  @Override
-  public SimplePatternTokenizer create(final AttributeFactory factory) {
-    return new SimplePatternTokenizer(factory, dfa);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
index 4b37eb8..be0b7d4 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
@@ -21,8 +21,6 @@ org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory
 org.apache.lucene.analysis.ngram.NGramTokenizerFactory
 org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory
 org.apache.lucene.analysis.pattern.PatternTokenizerFactory
-org.apache.lucene.analysis.pattern.SimplePatternSplitTokenizerFactory
-org.apache.lucene.analysis.pattern.SimplePatternTokenizerFactory
 org.apache.lucene.analysis.standard.ClassicTokenizerFactory
 org.apache.lucene.analysis.standard.StandardTokenizerFactory
 org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 3a58bdd..8953f9f 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -96,11 +96,7 @@ import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.Rethrow;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.Version;
-import org.apache.lucene.util.automaton.Automaton;
-import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
-import org.apache.lucene.util.automaton.Operations;
-import org.apache.lucene.util.automaton.RegExp;
 import org.junit.AfterClass;
 import org.junit.BeforeClass;
 import org.tartarus.snowball.SnowballProgram;
@@ -498,9 +494,6 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
         if (random.nextBoolean()) return null;
         return DateFormat.getDateInstance(DateFormat.DEFAULT, randomLocale(random));
     });
-    put(Automaton.class, random -> {
-        return Operations.determinize(new RegExp(AutomatonTestUtil.randomRegexp(random()), RegExp.NONE).toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-    });
   }};
   
   static final Set<Class<?>> allowedTokenizerArgs, allowedTokenFilterArgs, allowedCharFilterArgs;
@@ -510,8 +503,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
     allowedTokenizerArgs.add(Reader.class);
     allowedTokenizerArgs.add(AttributeFactory.class);
     allowedTokenizerArgs.add(AttributeSource.class);
-    allowedTokenizerArgs.add(Automaton.class);
-
+    
     allowedTokenFilterArgs = Collections.newSetFromMap(new IdentityHashMap<Class<?>,Boolean>());
     allowedTokenFilterArgs.addAll(argProducers.keySet());
     allowedTokenFilterArgs.add(TokenStream.class);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternSplitTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternSplitTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternSplitTokenizer.java
deleted file mode 100644
index 5642c2b..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternSplitTokenizer.java
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.pattern;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.charfilter.MappingCharFilter;
-import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
-
-public class TestSimplePatternSplitTokenizer extends BaseTokenStreamTestCase {
-
-  public void testGreedy() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("(foo)+");
-    t.setReader(new StringReader("bar foofoo baz"));
-    assertTokenStreamContents(t,
-                              new String[] {"bar ", " baz"},
-                              new int[] {0, 10},
-                              new int[] {4, 14});
-  }
-
-  public void testBackToBack() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("foo");
-    t.setReader(new StringReader("bar foofoo baz"));
-    assertTokenStreamContents(t,
-                              new String[] {"bar ", " baz"},
-                              new int[] {0, 10},
-                              new int[] {4, 14});
-  }
-
-  public void testBigLookahead() throws Exception {
-    StringBuilder b = new StringBuilder();
-    for(int i=0;i<100;i++) {
-      b.append('a');
-    }
-    b.append('b');
-    Tokenizer t = new SimplePatternSplitTokenizer(b.toString());
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-
-    b = new StringBuilder();
-    for(int i=0;i<200;i++) {
-      b.append('a');
-    }
-    t.setReader(new StringReader(b.toString()));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals(b.toString(), termAtt.toString());
-    assertFalse(t.incrementToken());
-  }
-
-  public void testNoTokens() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer(".*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    String s;
-    while (true) {
-      s = TestUtil.randomUnicodeString(random());
-      if (s.length() > 0) {
-        break;
-      }
-    }
-    t.setReader(new StringReader(s));
-    t.reset();
-    assertFalse(t.incrementToken());
-  }
-
-  public void testEmptyStringPatternNoMatch() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("a*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("bbb"));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals("bbb", termAtt.toString());
-    assertFalse(t.incrementToken());
-  }
-
-  public void testSplitSingleCharWhitespace() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("a \tb   c"));
-    assertTokenStreamContents(t,
-                              new String[] {"a", "b", "c"},
-                              new int[] {0, 3, 7},
-                              new int[] {1, 4, 8});
-  }
-
-  public void testSplitMultiCharWhitespace() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("a \tb   c"));
-    assertTokenStreamContents(t,
-                              new String[] {"a", "b", "c"},
-                              new int[] {0, 3, 7},
-                              new int[] {1, 4, 8});
-  }
-
-  public void testLeadingNonToken() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("    a c"));
-    assertTokenStreamContents(t,
-                              new String[] {"a", "c"},
-                              new int[] {4, 6},
-                              new int[] {5, 7});
-  }
-
-  public void testTrailingNonToken() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("[ \t\r\n]*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("a c   "));
-    assertTokenStreamContents(t,
-                              new String[] {"a", "c"},
-                              new int[] {0, 2},
-                              new int[] {1, 3});
-  }
-
-  public void testEmptyStringPatternOneMatch() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("a*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("bbab"));
-    assertTokenStreamContents(t,
-                              new String[] {"bb", "b"},
-                              new int[] {0, 3},
-                              new int[] {2, 4});
-  }
-
-  public void testEndOffset() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("a+");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    OffsetAttribute offsetAtt = t.getAttribute(OffsetAttribute.class);
-    t.setReader(new StringReader("aaabbb"));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals("bbb", termAtt.toString());
-    assertFalse(t.incrementToken());
-    t.end();
-    assertEquals(6, offsetAtt.endOffset());
-  }
-
-  public void testFixedToken() throws Exception {
-    Tokenizer t = new SimplePatternSplitTokenizer("aaaa");
-
-    t.setReader(new StringReader("aaaaaaaaaaaaaaa"));
-    assertTokenStreamContents(t,
-                              new String[] {"aaa"},
-                              new int[] {12},
-                              new int[] {15});
-  }
-
-  public void testBasic() throws Exception 
-  {
-    String[][] tests = {
-      // pattern        input                    output
-      { "--",          "aaa--bbb--ccc",         "aaa bbb ccc" },
-      { ":",           "aaa:bbb:ccc",           "aaa bbb ccc" },
-      { ":",           "boo:and:foo",           "boo and foo" },
-      { "o",           "boo:and:foo",           "b :and:f" },
-    };
-    
-    for(String[] test : tests) {     
-      TokenStream stream = new SimplePatternSplitTokenizer(test[0]);
-      ((Tokenizer)stream).setReader(new StringReader(test[1]));
-      String out = tsToString(stream);
-      assertEquals("pattern: "+test[0]+" with input: "+test[1], test[2], out);
-    } 
-  }
-
-  public void testNotDeterminized() throws Exception {
-    Automaton a = new Automaton();
-    int start = a.createState();
-    int mid1 = a.createState();
-    int mid2 = a.createState();
-    int end = a.createState();
-    a.setAccept(end, true);
-    a.addTransition(start, mid1, 'a', 'z');
-    a.addTransition(start, mid2, 'a', 'z');
-    a.addTransition(mid1, end, 'b');
-    a.addTransition(mid2, end, 'b');
-    expectThrows(IllegalArgumentException.class, () -> {new SimplePatternSplitTokenizer(a);});
-  }
-
-  public void testOffsetCorrection() throws Exception {
-    final String INPUT = "G&uuml;nther G&uuml;nther is here";
-
-    // create MappingCharFilter
-    List<String> mappingRules = new ArrayList<>();
-    mappingRules.add( "\"&uuml;\" => \"�\"" );
-    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-    builder.add("&uuml;", "�");
-    NormalizeCharMap normMap = builder.build();
-    CharFilter charStream = new MappingCharFilter( normMap, new StringReader(INPUT));
-
-    // create SimplePatternSplitTokenizer
-    Tokenizer stream = new SimplePatternSplitTokenizer("G�nther");
-    stream.setReader(charStream);
-    assertTokenStreamContents(stream,
-        new String[] { " ", " is here" },
-        new int[] { 12, 25 },
-        new int[] { 13, 33 },
-        INPUT.length());
-  }
-  
-  /** 
-   * TODO: rewrite tests not to use string comparison.
-   */
-  private static String tsToString(TokenStream in) throws IOException {
-    StringBuilder out = new StringBuilder();
-    CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
-    // extra safety to enforce, that the state is not preserved and also
-    // assign bogus values
-    in.clearAttributes();
-    termAtt.setEmpty().append("bogusTerm");
-    in.reset();
-    while (in.incrementToken()) {
-      if (out.length() > 0) {
-        out.append(' ');
-      }
-      out.append(termAtt.toString());
-      in.clearAttributes();
-      termAtt.setEmpty().append("bogusTerm");
-    }
-
-    in.close();
-    return out.toString();
-  }
-  
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    Analyzer a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new SimplePatternSplitTokenizer("a");
-        return new TokenStreamComponents(tokenizer);
-      }    
-    };
-    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
-    a.close();
-    
-    Analyzer b = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new SimplePatternSplitTokenizer("a");
-        return new TokenStreamComponents(tokenizer);
-      }    
-    };
-    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
-    b.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternTokenizer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternTokenizer.java
deleted file mode 100644
index b566713..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestSimplePatternTokenizer.java
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.pattern;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.CharFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.charfilter.MappingCharFilter;
-import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.TestUtil;
-import org.apache.lucene.util.automaton.Automaton;
-
-public class TestSimplePatternTokenizer extends BaseTokenStreamTestCase {
-
-  public void testGreedy() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer("(foo)+");
-    t.setReader(new StringReader("bar foofoo baz"));
-    assertTokenStreamContents(t,
-                              new String[] {"foofoo"},
-                              new int[] {4},
-                              new int[] {10});
-  }
-
-  public void testBigLookahead() throws Exception {
-    StringBuilder b = new StringBuilder();
-    for(int i=0;i<100;i++) {
-      b.append('a');
-    }
-    b.append('b');
-    Tokenizer t = new SimplePatternTokenizer(b.toString());
-
-    b = new StringBuilder();
-    for(int i=0;i<200;i++) {
-      b.append('a');
-    }
-    t.setReader(new StringReader(b.toString()));
-    t.reset();
-    assertFalse(t.incrementToken());
-  }
-
-  public void testOneToken() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer(".*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    String s;
-    while (true) {
-      s = TestUtil.randomUnicodeString(random());
-      if (s.length() > 0) {
-        break;
-      }
-    }
-    t.setReader(new StringReader(s));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals(s, termAtt.toString());
-  }
-
-  public void testEmptyStringPatternNoMatch() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer("a*");
-    t.setReader(new StringReader("bbb"));
-    t.reset();
-    assertFalse(t.incrementToken());
-  }
-
-  public void testEmptyStringPatternOneMatch() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer("a*");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    t.setReader(new StringReader("bbab"));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals("a", termAtt.toString());
-    assertFalse(t.incrementToken());
-  }
-
-  public void testEndOffset() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer("a+");
-    CharTermAttribute termAtt = t.getAttribute(CharTermAttribute.class);
-    OffsetAttribute offsetAtt = t.getAttribute(OffsetAttribute.class);
-    t.setReader(new StringReader("aaabbb"));
-    t.reset();
-    assertTrue(t.incrementToken());
-    assertEquals("aaa", termAtt.toString());
-    assertFalse(t.incrementToken());
-    t.end();
-    assertEquals(6, offsetAtt.endOffset());
-  }
-
-  public void testFixedToken() throws Exception {
-    Tokenizer t = new SimplePatternTokenizer("aaaa");
-
-    t.setReader(new StringReader("aaaaaaaaaaaaaaa"));
-    assertTokenStreamContents(t,
-                              new String[] {"aaaa", "aaaa", "aaaa"},
-                              new int[] {0, 4, 8},
-                              new int[] {4, 8, 12});
-  }
-
-  public void testBasic() throws Exception  {
-    String qpattern = "\\'([^\\']+)\\'"; // get stuff between "'"
-    String[][] tests = {
-      // pattern        input                    output
-      { ":",           "boo:and:foo",           ": :" },
-      { qpattern,      "aaa 'bbb' 'ccc'",       "'bbb' 'ccc'" },
-    };
-    
-    for(String[] test : tests) {     
-      TokenStream stream = new SimplePatternTokenizer(test[0]);
-      ((Tokenizer)stream).setReader(new StringReader(test[1]));
-      String out = tsToString(stream);
-
-      assertEquals("pattern: "+test[0]+" with input: "+test[1], test[2], out);
-    } 
-  }
-
-  public void testNotDeterminized() throws Exception {
-    Automaton a = new Automaton();
-    int start = a.createState();
-    int mid1 = a.createState();
-    int mid2 = a.createState();
-    int end = a.createState();
-    a.setAccept(end, true);
-    a.addTransition(start, mid1, 'a', 'z');
-    a.addTransition(start, mid2, 'a', 'z');
-    a.addTransition(mid1, end, 'b');
-    a.addTransition(mid2, end, 'b');
-    expectThrows(IllegalArgumentException.class, () -> {new SimplePatternTokenizer(a);});
-  }
-
-  public void testOffsetCorrection() throws Exception {
-    final String INPUT = "G&uuml;nther G&uuml;nther is here";
-
-    // create MappingCharFilter
-    List<String> mappingRules = new ArrayList<>();
-    mappingRules.add( "\"&uuml;\" => \"�\"" );
-    NormalizeCharMap.Builder builder = new NormalizeCharMap.Builder();
-    builder.add("&uuml;", "�");
-    NormalizeCharMap normMap = builder.build();
-    CharFilter charStream = new MappingCharFilter( normMap, new StringReader(INPUT));
-
-    // create SimplePatternTokenizer
-    Tokenizer stream = new SimplePatternTokenizer("G�nther");
-    stream.setReader(charStream);
-    assertTokenStreamContents(stream,
-        new String[] { "G�nther", "G�nther" },
-        new int[] { 0, 13 },
-        new int[] { 12, 25 },
-        INPUT.length());
-  }
-  
-  /** 
-   * TODO: rewrite tests not to use string comparison.
-   */
-  private static String tsToString(TokenStream in) throws IOException {
-    StringBuilder out = new StringBuilder();
-    CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class);
-    // extra safety to enforce, that the state is not preserved and also
-    // assign bogus values
-    in.clearAttributes();
-    termAtt.setEmpty().append("bogusTerm");
-    in.reset();
-    while (in.incrementToken()) {
-      if (out.length() > 0) {
-        out.append(' ');
-      }
-      out.append(termAtt.toString());
-      in.clearAttributes();
-      termAtt.setEmpty().append("bogusTerm");
-    }
-
-    in.close();
-    return out.toString();
-  }
-  
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    Analyzer a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new SimplePatternTokenizer("a");
-        return new TokenStreamComponents(tokenizer);
-      }    
-    };
-    checkRandomData(random(), a, 1000*RANDOM_MULTIPLIER);
-    a.close();
-    
-    Analyzer b = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new SimplePatternTokenizer("a");
-        return new TokenStreamComponents(tokenizer);
-      }    
-    };
-    checkRandomData(random(), b, 1000*RANDOM_MULTIPLIER);
-    b.close();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index 7d64bc2..2a988eb 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2348,7 +2348,7 @@ ${ant.project.name}.test.dependencies=${test.classpath.list}
   <property name="forbidden-sysout-excludes" value=""/>
   
   <target name="-install-forbidden-apis" unless="forbidden-apis.loaded" depends="ivy-availability-check,ivy-configure">
-    <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="2.3"
+    <ivy:cachepath organisation="de.thetaphi" module="forbiddenapis" revision="2.2"
       inline="true" conf="default" transitive="true" pathid="forbidden-apis.classpath"/>
     <taskdef name="forbidden-apis" classname="de.thetaphi.forbiddenapis.ant.AntTask" classpathref="forbidden-apis.classpath"/>
     <property name="forbidden-apis.loaded" value="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
index a536f73..81858df 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
@@ -362,13 +362,11 @@
  * </p>
  * <ol>
  *   <li>Inhibiting phrase and proximity matches in sentence boundaries &ndash; for this, a tokenizer that 
- *       identifies a new sentence can add 1 to the position increment of the first token of the new sentence.</li>
- *   <li>Injecting synonyms &ndash; synonyms of a token should be created at the same position as the
- *       original token, and the output order of the original token and the injected synonym is undefined
- *       as long as they both leave from the same position.  As result, all synonyms of a token would be
- *       considered to appear in exactly the same position as that token, and so would they be seen by
- *       phrase and proximity searches.  For multi-token synonyms to work correctly, you should use
- *       {@code SynoymGraphFilter} at search time only.</li>
+ *     identifies a new sentence can add 1 to the position increment of the first token of the new sentence.</li>
+ *   <li>Injecting synonyms &ndash; here, synonyms of a token should be added after that token, 
+ *     and their position increment should be set to 0.
+ *     As result, all synonyms of a token would be considered to appear in exactly the 
+ *     same position as that token, and so would they be seen by phrase and proximity searches.</li>
  * </ol>
  * 
  * <h3>Token Position Length</h3>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
index abd5109..ca14bc6 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/ByteRunAutomaton.java
@@ -27,9 +27,9 @@ public class ByteRunAutomaton extends RunAutomaton {
     this(a, false, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
   }
   
-  /** expert: if isBinary is true, the input is already byte-based */
+  /** expert: if utf8 is true, the input is already byte-based */
   public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) {
-    super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, maxDeterminizedStates);
+    super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
index 1a9c1c9..70ff9aa 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/CharacterRunAutomaton.java
@@ -36,7 +36,7 @@ public class CharacterRunAutomaton extends RunAutomaton {
    *   it then a TooComplexToDeterminizeException is thrown.
    */ 
   public CharacterRunAutomaton(Automaton a, int maxDeterminizedStates) {
-    super(a, Character.MAX_CODE_POINT+1, maxDeterminizedStates);
+    super(a, Character.MAX_CODE_POINT, false, maxDeterminizedStates);
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
index b673a82..718a908 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -29,24 +29,24 @@
 
 package org.apache.lucene.util.automaton;
 
-import java.util.ArrayDeque;
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.IntsRefBuilder;
+import org.apache.lucene.util.RamUsageEstimator;
+
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
 import java.util.Collection;
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.util.ArrayUtil;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
-import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.IntsRefBuilder;
-import org.apache.lucene.util.RamUsageEstimator;
-
 /**
  * Automata operations.
  * 
@@ -335,7 +335,7 @@ final public class Operations {
     Transition[][] transitions2 = a2.getSortedTransitions();
     Automaton c = new Automaton();
     c.createState();
-    ArrayDeque<StatePair> worklist = new ArrayDeque<>();
+    LinkedList<StatePair> worklist = new LinkedList<>();
     HashMap<StatePair,StatePair> newstates = new HashMap<>();
     StatePair p = new StatePair(0, 0, 0);
     worklist.add(p);
@@ -435,7 +435,7 @@ final public class Operations {
     // TODO: cutover to iterators instead
     Transition[][] transitions1 = a1.getSortedTransitions();
     Transition[][] transitions2 = a2.getSortedTransitions();
-    ArrayDeque<StatePair> worklist = new ArrayDeque<>();
+    LinkedList<StatePair> worklist = new LinkedList<>();
     HashSet<StatePair> visited = new HashSet<>();
     StatePair p = new StatePair(0, 0);
     worklist.add(p);
@@ -682,7 +682,7 @@ final public class Operations {
     // Create state 0:
     b.createState();
 
-    ArrayDeque<SortedIntSet.FrozenIntSet> worklist = new ArrayDeque<>();
+    LinkedList<SortedIntSet.FrozenIntSet> worklist = new LinkedList<>();
     Map<SortedIntSet.FrozenIntSet,Integer> newstate = new HashMap<>();
 
     worklist.add(initialset);
@@ -804,7 +804,7 @@ final public class Operations {
       return false;
     }
     
-    ArrayDeque<Integer> workList = new ArrayDeque<>();
+    LinkedList<Integer> workList = new LinkedList<>();
     BitSet seen = new BitSet(a.getNumStates());
     workList.add(0);
     seen.set(0);
@@ -907,7 +907,7 @@ final public class Operations {
     if (numStates == 0) {
       return live;
     }
-    ArrayDeque<Integer> workList = new ArrayDeque<>();
+    LinkedList<Integer> workList = new LinkedList<>();
     live.set(0);
     workList.add(0);
 
@@ -946,7 +946,7 @@ final public class Operations {
     }
     Automaton a2 = builder.finish();
 
-    ArrayDeque<Integer> workList = new ArrayDeque<>();
+    LinkedList<Integer> workList = new LinkedList<>();
     BitSet live = new BitSet(numStates);
     BitSet acceptBits = a.getAcceptStates();
     int s = 0;
@@ -1011,6 +1011,22 @@ final public class Operations {
   }
 
   /**
+   * Finds the largest entry whose value is less than or equal to c, or 0 if
+   * there is no such entry.
+   */
+  static int findIndex(int c, int[] points) {
+    int a = 0;
+    int b = points.length;
+    while (b - a > 1) {
+      int d = (a + b) >>> 1;
+      if (points[d] > c) b = d;
+      else if (points[d] < c) a = d;
+      else return d;
+    }
+    return a;
+  }
+  
+  /**
    * Returns true if the language of this automaton is finite.  The
    * automaton must not have any dead states.
    */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
index 4f53926..1d64095 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/RunAutomaton.java
@@ -38,62 +38,13 @@ import java.util.Arrays;
  */
 public abstract class RunAutomaton {
   final Automaton automaton;
-  final int alphabetSize;
+  final int maxInterval;
   final int size;
   final boolean[] accept;
   final int[] transitions; // delta(state,c) = transitions[state*points.length +
                      // getCharClass(c)]
   final int[] points; // char interval start points
-  final int[] classmap; // map from char number to class
-  
-  /**
-   * Constructs a new <code>RunAutomaton</code> from a deterministic
-   * <code>Automaton</code>.
-   * 
-   * @param a an automaton
-   */
-  protected RunAutomaton(Automaton a, int alphabetSize) {
-    this(a, alphabetSize, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
-  }
-
-  /**
-   * Constructs a new <code>RunAutomaton</code> from a deterministic
-   * <code>Automaton</code>.
-   * 
-   * @param a an automaton
-   * @param maxDeterminizedStates maximum number of states that can be created
-   *   while determinizing a
-   */
-  protected RunAutomaton(Automaton a, int alphabetSize, int maxDeterminizedStates) {
-    this.alphabetSize = alphabetSize;
-    a = Operations.determinize(a, maxDeterminizedStates);
-    this.automaton = a;
-    points = a.getStartPoints();
-    size = Math.max(1,a.getNumStates());
-    accept = new boolean[size];
-    transitions = new int[size * points.length];
-    Arrays.fill(transitions, -1);
-    for (int n=0;n<size;n++) {
-      accept[n] = a.isAccept(n);
-      for (int c = 0; c < points.length; c++) {
-        int dest = a.step(n, points[c]);
-        assert dest == -1 || dest < size;
-        transitions[n * points.length + c] = dest;
-      }
-    }
-
-    /*
-     * Set alphabet table for optimal run performance.
-     */
-    classmap = new int[Math.min(256, alphabetSize)];
-    int i = 0;
-    for (int j = 0; j < classmap.length; j++) {
-      if (i + 1 < points.length && j == points[i + 1]) {
-        i++;
-      }
-      classmap[j] = i;
-    }
-  }
+  final int[] classmap; // map from char number to class class
   
   /**
    * Returns a string representation of this automaton.
@@ -112,7 +63,7 @@ public abstract class RunAutomaton {
           int min = points[j];
           int max;
           if (j + 1 < points.length) max = (points[j + 1] - 1);
-          else max = alphabetSize;
+          else max = maxInterval;
           b.append(" ");
           Automaton.appendCharString(min, b);
           if (min != max) {
@@ -152,20 +103,64 @@ public abstract class RunAutomaton {
    * Gets character class of given codepoint
    */
   final int getCharClass(int c) {
+    return Operations.findIndex(c, points);
+  }
 
-    // binary search
-    int a = 0;
-    int b = points.length;
-    while (b - a > 1) {
-      int d = (a + b) >>> 1;
-      if (points[d] > c) b = d;
-      else if (points[d] < c) a = d;
-      else return d;
-    }
-    return a;
+  /**
+   * Constructs a new <code>RunAutomaton</code> from a deterministic
+   * <code>Automaton</code>.
+   * 
+   * @param a an automaton
+   */
+  public RunAutomaton(Automaton a, int maxInterval, boolean tableize) {
+    this(a, maxInterval, tableize, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
   }
 
   /**
+   * Constructs a new <code>RunAutomaton</code> from a deterministic
+   * <code>Automaton</code>.
+   * 
+   * @param a an automaton
+   * @param maxDeterminizedStates maximum number of states that can be created
+   *   while determinizing a
+   */
+  public RunAutomaton(Automaton a, int maxInterval, boolean tableize,
+      int maxDeterminizedStates) {
+    this.maxInterval = maxInterval;
+    a = Operations.determinize(a, maxDeterminizedStates);
+    this.automaton = a;
+    points = a.getStartPoints();
+    size = Math.max(1,a.getNumStates());
+    accept = new boolean[size];
+    transitions = new int[size * points.length];
+    Arrays.fill(transitions, -1);
+    for (int n=0;n<size;n++) {
+      accept[n] = a.isAccept(n);
+      for (int c = 0; c < points.length; c++) {
+        int dest = a.step(n, points[c]);
+        assert dest == -1 || dest < size;
+        transitions[n * points.length + c] = dest;
+      }
+    }
+
+    /*
+     * Set alphabet table for optimal run performance.
+     */
+    if (tableize) {
+      classmap = new int[maxInterval + 1];
+      int i = 0;
+      for (int j = 0; j <= maxInterval; j++) {
+        if (i + 1 < points.length && j == points[i + 1]) {
+          i++;
+        }
+        classmap[j] = i;
+      }
+    } else {
+      classmap = null;
+    }
+  }
+  
+  /**
    * Returns the state obtained by reading the given char from the given state.
    * Returns -1 if not obtaining any such state. (If the original
    * <code>Automaton</code> had no dead states, -1 is returned here if and only
@@ -173,8 +168,7 @@ public abstract class RunAutomaton {
    * transition function.)
    */
   public final int step(int state, int c) {
-    assert c < alphabetSize;
-    if (c >= classmap.length) {
+    if (classmap == null) {
       return transitions[state * points.length + getCharClass(c)];
     } else {
       return transitions[state * points.length + classmap[c]];
@@ -185,7 +179,7 @@ public abstract class RunAutomaton {
   public int hashCode() {
     final int prime = 31;
     int result = 1;
-    result = prime * result + alphabetSize;
+    result = prime * result + maxInterval;
     result = prime * result + points.length;
     result = prime * result + size;
     return result;
@@ -197,7 +191,7 @@ public abstract class RunAutomaton {
     if (obj == null) return false;
     if (getClass() != obj.getClass()) return false;
     RunAutomaton other = (RunAutomaton) obj;
-    if (alphabetSize != other.alphabetSize) return false;
+    if (maxInterval != other.maxInterval) return false;
     if (size != other.size) return false;
     if (!Arrays.equals(points, other.points)) return false;
     if (!Arrays.equals(accept, other.accept)) return false;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/demo/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/demo/ivy.xml b/lucene/demo/ivy.xml
index 5dd7e74..050f0a5 100644
--- a/lucene/demo/ivy.xml
+++ b/lucene/demo/ivy.xml
@@ -17,7 +17,7 @@
    under the License.    
 -->
 <ivy-module version="2.0">
-  <info organisation="org.apache.lucene" module="demo"/>
+  <info organisation="org.apache.lucene" module="core-demo"/>
   <configurations defaultconfmapping="compile->master">
     <conf name="compile" transitive="false"/>
   </configurations>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java
----------------------------------------------------------------------
diff --git a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java
index 7769712..0a1755c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/search/TermAutomatonScorer.java
@@ -367,7 +367,7 @@ class TermAutomatonScorer extends Scorer {
 
   static class TermRunAutomaton extends RunAutomaton {
     public TermRunAutomaton(Automaton a, int termCount) {
-      super(a, termCount);
+      super(a, termCount, true);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/test-framework/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/test-framework/ivy.xml b/lucene/test-framework/ivy.xml
index a51716c..a71c25a 100644
--- a/lucene/test-framework/ivy.xml
+++ b/lucene/test-framework/ivy.xml
@@ -17,7 +17,7 @@
    under the License.    
 -->
 <ivy-module version="2.0">
-  <info organisation="org.apache.lucene" module="test-framework"/>
+  <info organisation="org.apache.lucene" module="core-test-framework"/>
 
   <configurations defaultconfmapping="compile->master">
     <conf name="compile" transitive="false"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
index ca68d2e..4cd6534 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BasePointsFormatTestCase.java
@@ -40,7 +40,6 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.NumericUtils;
-import org.apache.lucene.util.Rethrow;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.TestUtil;
 
@@ -233,7 +232,16 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
           dir.setRandomIOExceptionRateOnOpen(0.05);
           verify(dir, docValues, null, numDims, numBytesPerDim, true);
         } catch (IllegalStateException ise) {
-          done = handlePossiblyFakeException(ise);
+          if (ise.getMessage().contains("this writer hit an unrecoverable error")) {
+            Throwable cause = ise.getCause();
+            if (cause != null && cause.getMessage().contains("a random IOException")) {
+              done = true;
+            } else {
+              throw ise;
+            }
+          } else {
+            throw ise;
+          }
         } catch (AssertionError ae) {
           if (ae.getMessage() != null && ae.getMessage().contains("does not exist; files=")) {
             // OK: likely we threw the random IOExc when IW was asserting the commit files exist
@@ -245,28 +253,23 @@ public abstract class BasePointsFormatTestCase extends BaseIndexFileFormatTestCa
           // This just means we got a too-small maxMB for the maxPointsInLeafNode; just retry w/ more heap
           assertTrue(iae.getMessage().contains("either increase maxMBSortInHeap or decrease maxPointsInLeafNode"));
         } catch (IOException ioe) {
-          done = handlePossiblyFakeException(ioe);
+          Throwable ex = ioe;
+          while (ex != null) {
+            String message = ex.getMessage();
+            if (message != null && (message.contains("a random IOException") || message.contains("background merge hit exception"))) {
+              done = true;
+              break;
+            }
+            ex = ex.getCause();            
+          }
+          if (done == false) {
+            throw ioe;
+          }
         }
       }
     }
   }
 
-  // TODO: merge w/ BaseIndexFileFormatTestCase.handleFakeIOException
-  private boolean handlePossiblyFakeException(Exception e) {
-    Throwable ex = e;
-    while (ex != null) {
-      String message = ex.getMessage();
-      if (message != null && (message.contains("a random IOException") || message.contains("background merge hit exception"))) {
-        return true;
-      }
-      ex = ex.getCause();            
-    }
-    Rethrow.rethrow(e);
-
-    // dead code yet javac disagrees:
-    return false;
-  }
-
   public void testMultiValued() throws Exception {
     int numBytesPerDim = TestUtil.nextInt(random(), 2, PointValues.MAX_NUM_BYTES);
     int numDims = TestUtil.nextInt(random(), 1, PointValues.MAX_DIMENSIONS);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/tools/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/tools/ivy.xml b/lucene/tools/ivy.xml
index 1fa2974..614aa8e 100644
--- a/lucene/tools/ivy.xml
+++ b/lucene/tools/ivy.xml
@@ -17,7 +17,7 @@
    under the License.    
 -->
 <ivy-module version="2.0">
-  <info organisation="org.apache.lucene" module="tools"/>
+  <info organisation="org.apache.lucene" module="core-tools"/>
   <configurations defaultconfmapping="compile->master">
     <conf name="compile" transitive="false"/>
   </configurations>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java
----------------------------------------------------------------------
diff --git a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java
index 5b2f0b8..45a9d11 100644
--- a/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java
+++ b/lucene/tools/src/java/org/apache/lucene/dependencies/GetMavenDependenciesTask.java
@@ -54,7 +54,6 @@ import java.util.SortedMap;
 import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
-import java.util.function.Consumer;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -119,7 +118,6 @@ public class GetMavenDependenciesTask extends Task {
   private final DocumentBuilder documentBuilder;
   private File ivyCacheDir;
   private Pattern internalJarPattern;
-  private Map<String,String> ivyModuleInfo;
 
 
   /**
@@ -191,8 +189,6 @@ public class GetMavenDependenciesTask extends Task {
     internalJarPattern = Pattern.compile(".*(lucene|solr)([^/]*?)-"
         + Pattern.quote(getProject().getProperty("version")) + "\\.jar");
 
-    ivyModuleInfo = getIvyModuleInfo(ivyXmlResources, documentBuilder, xpath);
-
     setInternalDependencyProperties();            // side-effect: all modules' internal deps are recorded
     setExternalDependencyProperties();            // side-effect: all modules' external deps are recorded
     setGrandparentDependencyManagementProperty(); // uses deps recorded in above two methods
@@ -224,56 +220,10 @@ public class GetMavenDependenciesTask extends Task {
   }
 
   /**
-   * Visits all ivy.xml files and collects module and organisation attributes into a map.
-   */
-  private static Map<String,String> getIvyModuleInfo(Resources ivyXmlResources,
-      DocumentBuilder documentBuilder, XPath xpath) {
-    Map<String,String> ivyInfoModuleToOrganisation = new HashMap<String,String>();
-    traverseIvyXmlResources(ivyXmlResources, new Consumer<File>() {
-      @Override
-      public void accept(File f) {
-        try {
-          Document document = documentBuilder.parse(f);
-          {
-            String infoPath = "/ivy-module/info";
-            NodeList infos = (NodeList)xpath.evaluate(infoPath, document, XPathConstants.NODESET);
-            for (int infoNum = 0 ; infoNum < infos.getLength() ; ++infoNum) {
-              Element infoElement = (Element)infos.item(infoNum);
-              String infoOrg = infoElement.getAttribute("organisation");
-              String infoOrgSuffix = infoOrg.substring(infoOrg.lastIndexOf('.')+1);
-              String infoModule = infoElement.getAttribute("module");
-              String module = infoOrgSuffix+"-"+infoModule;
-              ivyInfoModuleToOrganisation.put(module, infoOrg);
-            }
-          }
-        } catch (XPathExpressionException | IOException | SAXException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    });
-    return ivyInfoModuleToOrganisation;
-  }
-
-  /**
    * Collects external dependencies from each ivy.xml file and sets
    * external dependency properties to be inserted into modules' POMs. 
    */
   private void setExternalDependencyProperties() {
-    traverseIvyXmlResources(ivyXmlResources, new Consumer<File>() {
-      @Override
-      public void accept(File f) {
-        try {
-        collectExternalDependenciesFromIvyXmlFile(f);
-        } catch (XPathExpressionException | IOException | SAXException e) {
-          throw new RuntimeException(e);
-        }
-      }
-    });
-    addSharedExternalDependencies();
-    setExternalDependencyXmlProperties();
-  }
-
-  private static void traverseIvyXmlResources(Resources ivyXmlResources, Consumer<File> ivyXmlFileConsumer) {
     @SuppressWarnings("unchecked")
     Iterator<Resource> iter = (Iterator<Resource>)ivyXmlResources.iterator();
     while (iter.hasNext()) {
@@ -288,13 +238,15 @@ public class GetMavenDependenciesTask extends Task {
 
       File ivyXmlFile = ((FileResource)resource).getFile();
       try {
-        ivyXmlFileConsumer.accept(ivyXmlFile);
+        collectExternalDependenciesFromIvyXmlFile(ivyXmlFile);
       } catch (BuildException e) {
         throw e;
       } catch (Exception e) {
         throw new BuildException("Exception reading file " + ivyXmlFile.getPath() + ": " + e, e);
       }
     }
+    addSharedExternalDependencies();
+    setExternalDependencyXmlProperties();
   }
 
   /**
@@ -444,7 +396,7 @@ public class GetMavenDependenciesTask extends Task {
           }
         }
       }
-      String groupId = ivyModuleInfo.get(artifactId);
+      String groupId = "org.apache." + artifactId.substring(0, artifactId.indexOf('-'));
       appendDependencyXml(builder, groupId, artifactId, "      ", "${project.version}", false, false, null, exclusions);
     }
   }
@@ -629,7 +581,7 @@ public class GetMavenDependenciesTask extends Task {
             continue;  // skip external (/(test-)lib/), and non-jar and unwanted (self) internal deps
           }
           String artifactId = dependencyToArtifactId(newPropertyKey, dependency);
-          String groupId = ivyModuleInfo.get(artifactId);
+          String groupId = "org.apache." + artifactId.substring(0, artifactId.indexOf('-'));
           String coordinate = groupId + ':' + artifactId;
           sortedDeps.add(coordinate);
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 6cd5291..4a8766e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -74,12 +74,20 @@ Optimizations
 * SOLR-9584: Support Solr being proxied with another endpoint than default /solr, by using relative links
   in AdminUI javascripts (Yun Jie Zhou via janhoy)
 
+* SOLR-9996: Unstored IntPointField returns Long type (Ishan Chattopadhyaya)
+
 * SOLR-5944: In-place updates of Numeric DocValues. To leverage this, the _version_ field and the updated
   field must both be stored=false, indexed=false, docValues=true. (Ishan Chattopadhyaya, hossman, noble,
   shalin, yonik)
 
 Other Changes
 ----------------------
+* SOLR-8396: Add support for PointFields in Solr (Ishan Chattopadhyaya, Tom�s Fern�ndez L�bbe)
+
+* SOLR-10011: Refactor PointField & TrieField to now have a common base class, NumericFieldType. The
+  TrieField.TrieTypes and PointField.PointTypes are now consolidated to NumericFieldType.NumberType. This
+  refactoring also fixes a bug whereby PointFields were not using DocValues for range queries for
+  indexed=false, docValues=true fields. (Ishan Chattopadhyaya, Tom�s Fern�ndez L�bbe)
 
 ==================  6.5.0 ==================
 
@@ -131,10 +139,6 @@ New Features
 * SOLR-9903: Stop interrupting the update executor on shutdown, it can cause graceful shutdowns to put replicas into Leader 
   Initiated Recovery among other undesirable things. (Mark Miller)
 
-* SOLR-8396: Add support for PointFields in Solr (Ishan Chattopadhyaya, Tom�s Fern�ndez L�bbe)
-
-* SOLR-9987: Add support for MultiValued DocValues in PointFields using SortedNumericDocValues (Tom�s Fern�ndez L�bbe)
-
 Bug Fixes
 ----------------------
 
@@ -157,8 +161,6 @@ Bug Fixes
 
 * SOLR-10063: CoreContainer shutdown has race condition that can cause a hang on shutdown. (Mark Miller)
 
-* SOLR-10104: BlockDirectoryCache release hooks do not work with multiple directories. (Mike Drob, Mark Miller)
-
 Optimizations
 ----------------------
 
@@ -195,13 +197,6 @@ Other Changes
 
 * SOLR-10072: The test TestSelectiveWeightCreation appears to be unreliable. (Michael Nilsson via Mark Miller)
 
-* SOLR-9996: Unstored IntPointField returns Long type (Ishan Chattopadhyaya)
-
-* SOLR-10011: Refactor PointField & TrieField to now have a common base class, NumericFieldType. The
-  TrieField.TrieTypes and PointField.PointTypes are now consolidated to NumericFieldType.NumberType. This
-  refactoring also fixes a bug whereby PointFields were not using DocValues for range queries for
-  indexed=false, docValues=true fields. (Ishan Chattopadhyaya, Tom�s Fern�ndez L�bbe)
-
 ==================  6.4.1 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/solr/contrib/extraction/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/ivy.xml b/solr/contrib/extraction/ivy.xml
index 42cee8a..5cf19a1 100644
--- a/solr/contrib/extraction/ivy.xml
+++ b/solr/contrib/extraction/ivy.xml
@@ -17,7 +17,7 @@
    under the License.    
 -->
 <ivy-module version="2.0">
-  <info organisation="org.apache.solr" module="cell"/>
+  <info organisation="org.apache.solr" module="extraction"/>
   <configurations defaultconfmapping="compile->master;test->master">
     <conf name="compile" transitive="false"/>
     <conf name="test" transitive="false"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/solr/core/src/java/org/apache/solr/cloud/Overseer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/cloud/Overseer.java b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
index 3a8aa3e..0b74ccb 100644
--- a/solr/core/src/java/org/apache/solr/cloud/Overseer.java
+++ b/solr/core/src/java/org/apache/solr/cloud/Overseer.java
@@ -490,6 +490,7 @@ public class Overseer implements Closeable {
     this.zkController = zkController;
     this.stats = new Stats();
     this.config = config;
+    assert ObjectReleaseTracker.track(this);
   }
   
   public synchronized void start(String id) {
@@ -520,7 +521,6 @@ public class Overseer implements Closeable {
     updaterThread.start();
     ccThread.start();
     arfoThread.start();
-    assert ObjectReleaseTracker.track(this);
   }
 
   public Stats getStats() {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/325cbf00/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
index 123abea..4be643e 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/RealTimeGetComponent.java
@@ -688,12 +688,7 @@ public class RealTimeGetComponent extends SearchComponent
 
         if (sf != null && sf.multiValued()) {
           List<Object> vals = new ArrayList<>();
-          if (f.fieldType().docValuesType() == DocValuesType.SORTED_NUMERIC) {
-            // SORTED_NUMERICS store sortable bits version of the value, need to retrieve the original
-            vals.add(sf.getType().toObject(f));
-          } else {
-            vals.add( f );
-          }
+          vals.add( f );
           out.setField( f.name(), vals );
         }
         else{