You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/09/20 23:59:31 UTC

[25/29] lucene-solr:jira/http2: LUCENE-8498: Remove LowerCaseTokenizer

LUCENE-8498: Remove LowerCaseTokenizer


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/c0d29759
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/c0d29759
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/c0d29759

Branch: refs/heads/jira/http2
Commit: c0d2975970d3de8f5056a20504dec1431d455ab1
Parents: 52bdcf6
Author: Alan Woodward <ro...@apache.org>
Authored: Sat Sep 15 16:56:27 2018 +0100
Committer: Alan Woodward <ro...@apache.org>
Committed: Thu Sep 20 11:57:05 2018 +0100

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |   3 +
 lucene/MIGRATE.txt                              |  11 ++
 .../analysis/core/LowerCaseTokenizer.java       |  72 -----------
 .../core/LowerCaseTokenizerFactory.java         |  75 ------------
 .../lucene/analysis/core/SimpleAnalyzer.java    |   4 +-
 .../lucene/analysis/core/StopAnalyzer.java      |   6 +-
 .../lucene/analysis/util/CharTokenizer.java     |  86 +------------
 ...apache.lucene.analysis.util.TokenizerFactory |   1 -
 .../analysis/br/TestBrazilianAnalyzer.java      |   7 +-
 .../lucene/analysis/core/TestAnalyzers.java     |   8 --
 .../analysis/custom/TestCustomAnalyzer.java     |  13 +-
 .../lucene/analysis/de/TestGermanAnalyzer.java  |   8 +-
 .../standard/TestStandardFactories.java         |  16 ---
 .../analysis/util/TestCharTokenizers.java       | 122 +++----------------
 .../extraction/solr/collection1/conf/schema.xml |  15 ---
 .../collection1/conf/schema-copyfield-test.xml  |  14 ---
 .../solr/collection1/conf/schema-folding.xml    |   3 +-
 .../solr/collection1/conf/schema-hash.xml       |  13 --
 .../collection1/conf/schema-required-fields.xml |  14 ---
 .../solr/collection1/conf/schema-rest.xml       |   9 +-
 .../solr/collection1/conf/schema-sql.xml        |  13 --
 .../collection1/conf/schema-tokenizer-test.xml  |  11 +-
 .../test-files/solr/collection1/conf/schema.xml |  15 +--
 .../solr/collection1/conf/schema12.xml          |   9 +-
 .../solr/collection1/conf/schema15.xml          |  14 ---
 .../solr/collection1/conf/schemasurround.xml    |  14 ---
 .../schema/TestFieldCollectionResource.java     |  10 +-
 .../solr/rest/schema/TestFieldTypeResource.java |   3 +-
 .../solr/util/TestMaxTokenLenTokenizer.java     |  20 +--
 .../solrj/solr/collection1/conf/schema-sql.xml  |  13 --
 .../solrj/solr/collection1/conf/schema.xml      |  13 --
 .../solr/configsets/streaming/conf/schema.xml   |  11 --
 32 files changed, 78 insertions(+), 568 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index bd8c616..70badd8 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -81,6 +81,9 @@ API Changes
 * LUCENE-8352: TokenStreamComponents is now final, and can take a Consumer<Reader>
   in its constructor (Mark Harwood, Alan Woodward, Adrien Grand)
 
+* LUCENE-8498: LowerCaseTokenizer has been removed, and CharTokenizer no longer
+  takes a normalizer function. (Alan Woodward)
+
 Changes in Runtime Behavior
 
 * LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/MIGRATE.txt
----------------------------------------------------------------------
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index 6008956..1b56b64 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -129,3 +129,14 @@ Most code should just require recompilation, though possibly requiring some adde
 Instead of overriding TokenStreamComponents#setReader() to customise analyzer
 initialisation, you should now pass a Consumer&lt;Reader> instance to the
 TokenStreamComponents constructor.
+
+## LowerCaseTokenizer and LowerCaseTokenizerFactory have been removed ##
+
+LowerCaseTokenizer combined tokenization and filtering in a way that broke token
+normalization, so they have been removed. Instead, use a LetterTokenizer followed by
+a LowerCaseFilter
+
+## CharTokenizer no longer takes a normalizer function ##
+
+CharTokenizer now only performs tokenization. To perform any type of filtering
+use a TokenFilter chain as you would with any other Tokenizer.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
deleted file mode 100644
index 26b8747..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizer.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.AttributeFactory;
-
-/**
- * LowerCaseTokenizer performs the function of LetterTokenizer
- * and LowerCaseFilter together.  It divides text at non-letters and converts
- * them to lower case.  While it is functionally equivalent to the combination
- * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
- * to doing the two tasks at once, hence this (redundant) implementation.
- * <P>
- * Note: this does a decent job for most European languages, but does a terrible
- * job for some Asian languages, where words are not separated by spaces.
- * </p>
- */
-public final class LowerCaseTokenizer extends LetterTokenizer {
-  
-  /**
-   * Construct a new LowerCaseTokenizer.
-   */
-  public LowerCaseTokenizer() {
-  }
-
-  /**
-   * Construct a new LowerCaseTokenizer using a given
-   * {@link org.apache.lucene.util.AttributeFactory}.
-   *
-   * @param factory
-   *          the attribute factory to use for this {@link Tokenizer}
-   */
-  public LowerCaseTokenizer(AttributeFactory factory) {
-    super(factory);
-  }
-  
-  /**
-   * Construct a new LowerCaseTokenizer using a given
-   * {@link org.apache.lucene.util.AttributeFactory}.
-   *
-   * @param factory the attribute factory to use for this {@link Tokenizer}
-   * @param maxTokenLen maximum token length the tokenizer will emit. 
-   *        Must be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024)
-   * @throws IllegalArgumentException if maxTokenLen is invalid.
-   */
-  public LowerCaseTokenizer(AttributeFactory factory, int maxTokenLen) {
-    super(factory, maxTokenLen);
-  }
-  
-  /** Converts char to lower case
-   * {@link Character#toLowerCase(int)}.*/
-  @Override
-  protected int normalize(int c) {
-    return Character.toLowerCase(c);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
deleted file mode 100644
index 44e2742..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseTokenizerFactory.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-
-import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
-import org.apache.lucene.analysis.util.CharTokenizer;
-import org.apache.lucene.analysis.util.MultiTermAwareComponent;
-import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.apache.lucene.util.AttributeFactory;
-
-import java.util.HashMap;
-import java.util.Map;
-
-import static org.apache.lucene.analysis.standard.StandardTokenizer.MAX_TOKEN_LENGTH_LIMIT;
-
-/**
- * Factory for {@link LowerCaseTokenizer}.
- * <pre class="prettyprint">
- * &lt;fieldType name="text_lwrcase" class="solr.TextField" positionIncrementGap="100"&gt;
- * &lt;analyzer&gt;
- * &lt;tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="256"/&gt;
- * &lt;/analyzer&gt;
- * &lt;/fieldType&gt;</pre>
- * <p>
- * Options:
- * <ul>
- * <li>maxTokenLen: max token length, should be greater than 0 and less than MAX_TOKEN_LENGTH_LIMIT (1024*1024).
- *     It is rare to need to change this
- * else {@link CharTokenizer}::DEFAULT_MAX_WORD_LEN</li>
- * </ul>
- */
-public class LowerCaseTokenizerFactory extends TokenizerFactory implements MultiTermAwareComponent {
-  private final int maxTokenLen;
-
-  /**
-   * Creates a new LowerCaseTokenizerFactory
-   */
-  public LowerCaseTokenizerFactory(Map<String, String> args) {
-    super(args);
-    maxTokenLen = getInt(args, "maxTokenLen", CharTokenizer.DEFAULT_MAX_WORD_LEN);
-    if (maxTokenLen > MAX_TOKEN_LENGTH_LIMIT || maxTokenLen <= 0) {
-      throw new IllegalArgumentException("maxTokenLen must be greater than 0 and less than " + MAX_TOKEN_LENGTH_LIMIT + " passed: " + maxTokenLen);
-    }
-    if (!args.isEmpty()) {
-      throw new IllegalArgumentException("Unknown parameters: " + args);
-    }
-  }
-
-  @Override
-  public LowerCaseTokenizer create(AttributeFactory factory) {
-    return new LowerCaseTokenizer(factory, maxTokenLen);
-  }
-
-  @Override
-  public AbstractAnalysisFactory getMultiTermComponent() {
-    Map<String,String> map = new HashMap<>(getOriginalArgs());
-    map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init
-    return new LowerCaseFilterFactory(map);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index 6e0f2f0..3fcb92c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.core;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 
 /** An {@link Analyzer} that filters {@link LetterTokenizer} 
  *  with {@link LowerCaseFilter} 
@@ -34,7 +35,8 @@ public final class SimpleAnalyzer extends Analyzer {
   
   @Override
   protected TokenStreamComponents createComponents(final String fieldName) {
-    return new TokenStreamComponents(new LowerCaseTokenizer());
+    Tokenizer tokenizer = new LetterTokenizer();
+    return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer));
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index cf7ecdd..dde74c0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -60,13 +60,13 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
    * used to tokenize all the text in the provided {@link Reader}.
    * 
    * @return {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents}
-   *         built from a {@link LowerCaseTokenizer} filtered with
+   *         built from a {@link LetterTokenizer} filtered with
    *         {@link StopFilter}
    */
   @Override
   protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer source = new LowerCaseTokenizer();
-    return new TokenStreamComponents(source, new StopFilter(source, stopwords));
+    final Tokenizer source = new LetterTokenizer();
+    return new TokenStreamComponents(source, new StopFilter(new LowerCaseFilter(source), stopwords));
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index ff9d6ff..092d25d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -20,14 +20,11 @@ package org.apache.lucene.analysis.util;
 import java.io.IOException;
 import java.util.Objects;
 import java.util.function.IntPredicate;
-import java.util.function.IntUnaryOperator;
 
-import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
 import org.apache.lucene.analysis.CharacterUtils;
-import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -107,48 +104,12 @@ public abstract class CharTokenizer extends Tokenizer {
    * </pre>
    */
   public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate) {
-    return fromTokenCharPredicate(factory, tokenCharPredicate, IntUnaryOperator.identity());
-  }
-  
-  /**
-   * Creates a new instance of CharTokenizer using a custom predicate, supplied as method reference or lambda expression.
-   * The predicate should return {@code true} for all valid token characters.
-   * This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
-   * <p>
-   * This factory is intended to be used with lambdas or method references. E.g., an elegant way
-   * to create an instance which behaves exactly as {@link LowerCaseTokenizer} is:
-   * <pre class="prettyprint lang-java">
-   * Tokenizer tok = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toLowerCase);
-   * </pre>
-   */
-  public static CharTokenizer fromTokenCharPredicate(final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
-    return fromTokenCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, tokenCharPredicate, normalizer);
-  }
-  
-  /**
-   * Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate, supplied as method reference or lambda expression.
-   * The predicate should return {@code true} for all valid token characters.
-   * This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
-   * <p>
-   * This factory is intended to be used with lambdas or method references. E.g., an elegant way
-   * to create an instance which behaves exactly as {@link LowerCaseTokenizer} is:
-   * <pre class="prettyprint lang-java">
-   * Tokenizer tok = CharTokenizer.fromTokenCharPredicate(factory, Character::isLetter, Character::toLowerCase);
-   * </pre>
-   */
-  public static CharTokenizer fromTokenCharPredicate(AttributeFactory factory, final IntPredicate tokenCharPredicate, final IntUnaryOperator normalizer) {
     Objects.requireNonNull(tokenCharPredicate, "predicate must not be null.");
-    Objects.requireNonNull(normalizer, "normalizer must not be null");
     return new CharTokenizer(factory) {
       @Override
       protected boolean isTokenChar(int c) {
         return tokenCharPredicate.test(c);
       }
-
-      @Override
-      protected int normalize(int c) {
-        return normalizer.applyAsInt(c);
-      }
     };
   }
   
@@ -167,7 +128,7 @@ public abstract class CharTokenizer extends Tokenizer {
   public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate) {
     return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate);
   }
-  
+
   /**
    * Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate, supplied as method reference or lambda expression.
    * The predicate should return {@code true} for all valid token separator characters.
@@ -179,37 +140,7 @@ public abstract class CharTokenizer extends Tokenizer {
    * </pre>
    */
   public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate) {
-    return fromSeparatorCharPredicate(factory, separatorCharPredicate, IntUnaryOperator.identity());
-  }
-  
-  /**
-   * Creates a new instance of CharTokenizer using a custom predicate, supplied as method reference or lambda expression.
-   * The predicate should return {@code true} for all valid token separator characters.
-   * This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
-   * <p>
-   * This factory is intended to be used with lambdas or method references. E.g., an elegant way
-   * to create an instance which behaves exactly as the combination {@link WhitespaceTokenizer} and {@link LowerCaseFilter} is:
-   * <pre class="prettyprint lang-java">
-   * Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(Character::isWhitespace, Character::toLowerCase);
-   * </pre>
-   */
-  public static CharTokenizer fromSeparatorCharPredicate(final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
-    return fromSeparatorCharPredicate(DEFAULT_TOKEN_ATTRIBUTE_FACTORY, separatorCharPredicate, normalizer);
-  }
-  
-  /**
-   * Creates a new instance of CharTokenizer with the supplied attribute factory using a custom predicate.
-   * The predicate should return {@code true} for all valid token separator characters.
-   * This factory also takes a function to normalize chars, e.g., lowercasing them, supplied as method reference or lambda expression.
-   * <p>
-   * This factory is intended to be used with lambdas or method references. E.g., an elegant way
-   * to create an instance which behaves exactly as {@link WhitespaceTokenizer} and {@link LowerCaseFilter} is:
-   * <pre class="prettyprint lang-java">
-   * Tokenizer tok = CharTokenizer.fromSeparatorCharPredicate(factory, Character::isWhitespace, Character::toLowerCase);
-   * </pre>
-   */
-  public static CharTokenizer fromSeparatorCharPredicate(AttributeFactory factory, final IntPredicate separatorCharPredicate, final IntUnaryOperator normalizer) {
-    return fromTokenCharPredicate(factory, separatorCharPredicate.negate(), normalizer);
+    return fromTokenCharPredicate(factory, separatorCharPredicate.negate());
   }
   
   private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
@@ -230,15 +161,6 @@ public abstract class CharTokenizer extends Tokenizer {
    */
   protected abstract boolean isTokenChar(int c);
 
-  /**
-   * Called on each token character to normalize it before it is added to the
-   * token. The default implementation does nothing. Subclasses may use this to,
-   * e.g., lowercase tokens.
-   */
-  protected int normalize(int c) {
-    return c;
-  }
-
   @Override
   public final boolean incrementToken() throws IOException {
     clearAttributes();
@@ -276,7 +198,7 @@ public abstract class CharTokenizer extends Tokenizer {
           buffer = termAtt.resizeBuffer(2+length); // make sure a supplementary fits in the buffer
         }
         end += charCount;
-        length += Character.toChars(normalize(c), buffer, length); // buffer it, normalized
+        length += Character.toChars(c, buffer, length); // buffer it, normalized
         if (length >= maxTokenLen) { // buffer overflow! make sure to check for >= surrogate pair could break == test
           break;
         }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
index 4b37eb8..e8bceff 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
@@ -15,7 +15,6 @@
 
 org.apache.lucene.analysis.core.KeywordTokenizerFactory
 org.apache.lucene.analysis.core.LetterTokenizerFactory
-org.apache.lucene.analysis.core.LowerCaseTokenizerFactory
 org.apache.lucene.analysis.core.WhitespaceTokenizerFactory
 org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory
 org.apache.lucene.analysis.ngram.NGramTokenizerFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
index 550a62a..5096ee8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
@@ -25,7 +25,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 
 /**
@@ -147,9 +148,9 @@ public class TestBrazilianAnalyzer extends BaseTokenStreamTestCase {
   public void testWithKeywordAttribute() throws IOException {
     CharArraySet set = new CharArraySet(1, true);
     set.add("Brasília");
-    Tokenizer tokenizer = new LowerCaseTokenizer();
+    Tokenizer tokenizer = new LetterTokenizer();
     tokenizer.setReader(new StringReader("Brasília Brasilia"));
-    BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
+    BrazilianStemFilter filter = new BrazilianStemFilter(new SetKeywordMarkerFilter(new LowerCaseFilter(tokenizer), set));
 
     assertTokenStreamContents(filter, new String[] { "brasília", "brasil" });
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
index b7fc18b..8133b7a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@@ -216,14 +216,6 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
     int length = highSurEndingLower.length();
     assertEquals('\ud801', termBuffer[length - 1]);
   }
-  
-  public void testLowerCaseTokenizer() throws IOException {
-    StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
-    LowerCaseTokenizer tokenizer = new LowerCaseTokenizer();
-    tokenizer.setReader(reader);
-    assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
-        "\ud801\udc44test" });
-  }
 
   public void testWhitespaceTokenizer() throws IOException {
     StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
index 1fa59d1..a4e1ac5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
@@ -31,9 +31,8 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
 import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory;
+import org.apache.lucene.analysis.core.LetterTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseTokenizerFactory;
 import org.apache.lucene.analysis.core.StopFilterFactory;
 import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
 import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
@@ -419,7 +418,7 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
 
     @Override
     public Tokenizer create(AttributeFactory factory) {
-      return new LowerCaseTokenizer(factory);
+      return new LetterTokenizer(factory);
     }
 
   }
@@ -500,14 +499,6 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
         .build();
     assertEquals(new BytesRef("e f c"), analyzer.normalize("dummy", "a b c"));
   }
-  
-  /** test normalize where the TokenizerFactory returns a filter to normalize the text */
-  public void testNormalizationWithLowerCaseTokenizer() throws IOException {
-    CustomAnalyzer analyzer1 = CustomAnalyzer.builder()
-        .withTokenizer(LowerCaseTokenizerFactory.class, Collections.emptyMap())
-        .build();
-    assertEquals(new BytesRef("abc"), analyzer1.normalize("dummy", "ABC"));
-  }
 
   public void testConditions() throws IOException {
     CustomAnalyzer analyzer = CustomAnalyzer.builder()

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
index 4c52c0e..3d8be31 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@@ -23,7 +23,9 @@ import java.io.StringReader;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LetterTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 
 public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
@@ -38,10 +40,10 @@ public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
   public void testWithKeywordAttribute() throws IOException {
     CharArraySet set = new CharArraySet( 1, true);
     set.add("fischen");
-    final LowerCaseTokenizer in = new LowerCaseTokenizer();
+    final Tokenizer in = new LetterTokenizer();
     in.setReader(new StringReader("Fischen Trinken"));
     GermanStemFilter filter = new GermanStemFilter(
-        new SetKeywordMarkerFilter(in, set));
+        new SetKeywordMarkerFilter(new LowerCaseFilter(in), set));
     assertTokenStreamContents(filter, new String[] { "fischen", "trink" });
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java
index 00bc7c6..3f3d5c2 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardFactories.java
@@ -126,17 +126,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase {
   }
   
   /**
-   * Test LowerCaseTokenizerFactory
-   */
-  public void testLowerCaseTokenizer() throws Exception {
-    Reader reader = new StringReader("What's this thing do?");
-    Tokenizer stream = tokenizerFactory("LowerCase").create(newAttributeFactory());
-    stream.setReader(reader);
-    assertTokenStreamContents(stream, 
-        new String[] { "what", "s", "this", "thing", "do" });
-  }
-  
-  /**
    * Ensure the ASCIIFoldingFilterFactory works
    */
   public void testASCIIFolding() throws Exception {
@@ -169,11 +158,6 @@ public class TestStandardFactories extends BaseTokenStreamFactoryTestCase {
     assertTrue(expected.getMessage().contains("Unknown parameters"));
     
     expected = expectThrows(IllegalArgumentException.class, () -> {
-      tokenizerFactory("LowerCase", "bogusArg", "bogusValue");
-    });
-    assertTrue(expected.getMessage().contains("Unknown parameters"));
-    
-    expected = expectThrows(IllegalArgumentException.class, () -> {
       tokenFilterFactory("ASCIIFolding", "bogusArg", "bogusValue");
     });
     assertTrue(expected.getMessage().contains("Unknown parameters"));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
index 4596608..2fcda4f 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java
@@ -21,16 +21,12 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.util.Locale;
 
-import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.util.TestUtil;
 
 
 /**
@@ -54,9 +50,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
     }
     // internal buffer size is 1024 make sure we have a surrogate pair right at the border
     builder.insert(1023, "\ud801\udc1c");
-    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
+    Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
     tokenizer.setReader(new StringReader(builder.toString()));
-    assertTokenStreamContents(tokenizer, builder.toString().toLowerCase(Locale.ROOT).split(" "));
+    assertTokenStreamContents(new LowerCaseFilter(tokenizer), builder.toString().toLowerCase(Locale.ROOT).split(" "));
   }
   
   /*
@@ -72,9 +68,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
         builder.append("a");
       }
       builder.append("\ud801\udc1cabc");
-      Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
+      Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
       tokenizer.setReader(new StringReader(builder.toString()));
-      assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT)});
+      assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT)});
     }
   }
   
@@ -87,9 +83,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
     for (int i = 0; i < 255; i++) {
       builder.append("A");
     }
-    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
+    Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
     tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
+    assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
   }
 
   /*
@@ -101,14 +97,14 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
     for (int i = 0; i < 100; i++) {
       builder.append("A");
     }
-    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 100);
+    Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
     // Tricky, passing two copies of the string to the reader....
     tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT), 
+    assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[]{builder.toString().toLowerCase(Locale.ROOT),
         builder.toString().toLowerCase(Locale.ROOT) });
 
     Exception e = expectThrows(IllegalArgumentException.class, () ->
-        new LowerCaseTokenizer(newAttributeFactory(), -1));
+        new LetterTokenizer(newAttributeFactory(), -1));
     assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage());
 
     tokenizer = new LetterTokenizer(newAttributeFactory(), 100);
@@ -134,16 +130,16 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
     }
 
     e = expectThrows(IllegalArgumentException.class, () ->
-        new LowerCaseTokenizer(newAttributeFactory(), 0));
+        new LetterTokenizer(newAttributeFactory(), 0));
     assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 0", e.getMessage());
 
     e = expectThrows(IllegalArgumentException.class, () ->
-        new LowerCaseTokenizer(newAttributeFactory(), 10_000_000));
+        new LetterTokenizer(newAttributeFactory(), 10_000_000));
     assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 10000000", e.getMessage());
 
-    tokenizer = new LowerCaseTokenizer(newAttributeFactory(), 4800);
+    tokenizer = new LetterTokenizer(newAttributeFactory(), 4800);
     tokenizer.setReader(new StringReader(builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[]{builder.toString().toLowerCase(Locale.ROOT)});
+    assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[]{builder.toString().toLowerCase(Locale.ROOT)});
 
 
     e = expectThrows(IllegalArgumentException.class, () ->
@@ -195,87 +191,9 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
       builder.append("A");
     }
     builder.append("\ud801\udc1c");
-    Tokenizer tokenizer = new LowerCaseTokenizer(newAttributeFactory());
+    Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory());
     tokenizer.setReader(new StringReader(builder.toString() + builder.toString()));
-    assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
-  }
-  
-  // LUCENE-3642: normalize SMP->BMP and check that offsets are correct
-  public void testCrossPlaneNormalization() throws IOException {
-    Analyzer analyzer = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) {
-          @Override
-          protected int normalize(int c) {
-            if (c > 0xffff) {
-              return 'δ';
-            } else {
-              return c;
-            }
-          }
-        };
-        return new TokenStreamComponents(tokenizer, tokenizer);
-      }
-    };
-    int num = 1000 * RANDOM_MULTIPLIER;
-    for (int i = 0; i < num; i++) {
-      String s = TestUtil.randomUnicodeString(random());
-      try (TokenStream ts = analyzer.tokenStream("foo", s)) {
-        ts.reset();
-        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
-        while (ts.incrementToken()) {
-          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
-          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
-            cp = highlightedText.codePointAt(j);
-            assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
-          }
-        }
-        ts.end();
-      }
-    }
-    // just for fun
-    checkRandomData(random(), analyzer, num);
-    analyzer.close();
-  }
-  
-  // LUCENE-3642: normalize BMP->SMP and check that offsets are correct
-  public void testCrossPlaneNormalization2() throws IOException {
-    Analyzer analyzer = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new LetterTokenizer(newAttributeFactory()) {
-          @Override
-          protected int normalize(int c) {
-            if (c <= 0xffff) {
-              return 0x1043C;
-            } else {
-              return c;
-            }
-          }
-        };
-        return new TokenStreamComponents(tokenizer, tokenizer);
-      }
-    };
-    int num = 1000 * RANDOM_MULTIPLIER;
-    for (int i = 0; i < num; i++) {
-      String s = TestUtil.randomUnicodeString(random());
-      try (TokenStream ts = analyzer.tokenStream("foo", s)) {
-        ts.reset();
-        OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
-        while (ts.incrementToken()) {
-          String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset());
-          for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) {
-            cp = highlightedText.codePointAt(j);
-            assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp));
-          }
-        }
-        ts.end();
-      }
-    }
-    // just for fun
-    checkRandomData(random(), analyzer, num);
-    analyzer.close();
+    assertTokenStreamContents(new LowerCaseFilter(tokenizer), new String[] {builder.toString().toLowerCase(Locale.ROOT), builder.toString().toLowerCase(Locale.ROOT)});
   }
   
   public void testDefinitionUsingMethodReference1() throws Exception {
@@ -287,16 +205,16 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
   
   public void testDefinitionUsingMethodReference2() throws Exception {
     final StringReader reader = new StringReader("Tokenizer(Test)");
-    final Tokenizer tokenizer = CharTokenizer.fromTokenCharPredicate(Character::isLetter, Character::toUpperCase);
+    final Tokenizer tokenizer = CharTokenizer.fromTokenCharPredicate(Character::isLetter);
     tokenizer.setReader(reader);
-    assertTokenStreamContents(tokenizer, new String[] { "TOKENIZER", "TEST" });
+    assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "Test" });
   }
   
   public void testDefinitionUsingLambda() throws Exception {
     final StringReader reader = new StringReader("Tokenizer\u00A0Test Foo");
-    final Tokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(c -> c == '\u00A0' || Character.isWhitespace(c), Character::toLowerCase);
+    final Tokenizer tokenizer = CharTokenizer.fromSeparatorCharPredicate(c -> c == '\u00A0' || Character.isWhitespace(c));
     tokenizer.setReader(reader);
-    assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test", "foo" });
+    assertTokenStreamContents(tokenizer, new String[] { "Tokenizer", "Test", "Foo" });
   }
   
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/schema.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/schema.xml b/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/schema.xml
index 3dbd6aa..475c333 100644
--- a/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/schema.xml
+++ b/solr/contrib/extraction/src/test-files/extraction/solr/collection1/conf/schema.xml
@@ -126,20 +126,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      <filter class="solr.ClassicFilterFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -386,8 +373,6 @@
   <field name="test_hlt_off" type="highlittext" indexed="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
index f36751e..20dc97a 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-copyfield-test.xml
@@ -90,19 +90,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -347,8 +335,6 @@
   <field name="test_hlt_off" type="highlittext" indexed="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml b/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml
index 1d20b80..0b13a57 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-folding.xml
@@ -81,7 +81,8 @@
 
   <fieldType name="text_lower_token" class="solr.TextField">
     <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      <tokenizer class="solr.LetterTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
       <filter class="solr.ASCIIFoldingFilterFactory"/>
     </analyzer>
   </fieldType>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml
index 3e8aa15..c2d6b39 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-hash.xml
@@ -139,18 +139,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldtype>
 
-  <fieldtype name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
-
   <!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldtype name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
   <fieldtype name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -484,8 +473,6 @@
          termPositions="true" termOffsets="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml
index 4210d5b..0ac0c04 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-required-fields.xml
@@ -73,19 +73,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -331,8 +319,6 @@
   <field name="test_hlt_off" type="highlittext" indexed="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
index 2a04356..46b735c 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-rest.xml
@@ -199,17 +199,13 @@
 
   <fieldType name="teststop" class="solr.TextField">
     <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      <tokenizer class="solr.LetterTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
       <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
     </analyzer>
   </fieldType>
 
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -604,7 +600,6 @@
 
   <!-- fields to test individual tokenizers and tokenfilters -->
   <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml
index 40bbe5a..03d9d7e 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-sql.xml
@@ -149,18 +149,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldtype>
 
-  <fieldtype name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
-
   <!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldtype name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
   <fieldtype name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -501,8 +490,6 @@
          termPositions="true" termOffsets="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml b/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml
index 6c33504..5613c66 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-tokenizer-test.xml
@@ -43,7 +43,8 @@ more concise example.
 
   <fieldType name="lowerCasefieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
     <analyzer type="index">
-      <tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="3" />
+      <tokenizer class="solr.LetterTokenizerFactory" maxTokenLen="3" />
+      <filter class="solr.LowerCaseFilterFactory"/>
     </analyzer>
     <analyzer type="query">
       <tokenizer class="solr.StandardTokenizerFactory"/>
@@ -85,12 +86,6 @@ more concise example.
     </analyzer>
   </fieldType>
 
-  <fieldType name="lowerCase0fieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory" maxTokenLen="3" />
-    </analyzer>
-  </fieldType>
-
   <fieldType name="whiteSp0fieldType" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
     <analyzer>
       <tokenizer class="solr.WhitespaceTokenizerFactory" maxTokenLen="3" />
@@ -112,13 +107,11 @@ more concise example.
   <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
 
   <field name="letter" type="letterfieldType" indexed="true" stored="true"/>
-  <field name="lowerCase" type="lowerCasefieldType" indexed="true" stored="true"/>
   <field name="whiteSpace" type="whiteSpfieldType" indexed="true" stored="true"/>
   <field name="unicodeWhiteSpace" type="uniWhiteSpfieldType" indexed="true" stored="true"/>
   <field name="keyword" type="keywordfieldType" indexed="true" stored="true"/>
 
   <field name="letter0" type="letter0fieldType" indexed="true" stored="true"/>
-  <field name="lowerCase0" type="lowerCase0fieldType" indexed="true" stored="true"/>
   <field name="whiteSpace0" type="whiteSp0fieldType" indexed="true" stored="true"/>
   <field name="unicodeWhiteSpace0" type="uniWhiteSp0fieldType" indexed="true" stored="true"/>
   <field name="keyword0" type="keyword0fieldType" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema.xml b/solr/core/src/test-files/solr/collection1/conf/schema.xml
index b1a261b..b61bbb1 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema.xml
@@ -142,20 +142,17 @@
 
   <fieldType name="teststop" class="solr.TextField">
     <analyzer type="index">
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      <tokenizer class="solr.LetterTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
       <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
     </analyzer>
     <analyzer type="query">
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      <tokenizer class="solr.LetterTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
     </analyzer>
   </fieldType>
 
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -574,7 +571,7 @@
 
   <!-- fields to test individual tokenizers and tokenfilters -->
   <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
+  <field name="lowertok" type="lowerfilt" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
@@ -636,7 +633,7 @@
 
   <field name="store" type="location" indexed="true" stored="true" omitNorms="false"/>
 
-  <field name="lower" type="lowertok" indexed="false" stored="true" multiValued="true"/>
+  <field name="lower" type="lowerfilt" indexed="false" stored="true" multiValued="true"/>
   <field name="_route_" type="string" indexed="true" stored="true" multiValued="false"/>
 
   <field name="payloadDelimited" type="payloadDelimited"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema12.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema12.xml b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
index 8947676..e4c3ad2 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema12.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema12.xml
@@ -234,17 +234,13 @@
 
   <fieldType name="teststop" class="solr.TextField">
     <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+      <tokenizer class="solr.LetterTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
       <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
     </analyzer>
   </fieldType>
 
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -591,7 +587,6 @@
 
   <!-- fields to test individual tokenizers and tokenfilters -->
   <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schema15.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema15.xml b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
index 80d19e9..361344f 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema15.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema15.xml
@@ -163,19 +163,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -505,8 +493,6 @@
   <field name="test_hlt_off" type="highlittext" indexed="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml
----------------------------------------------------------------------
diff --git a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml
index 213acc7..93b11ed 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schemasurround.xml
@@ -164,19 +164,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -517,8 +505,6 @@
   <field name="test_hlt_off" type="highlittext" indexed="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test/org/apache/solr/rest/schema/TestFieldCollectionResource.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldCollectionResource.java b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldCollectionResource.java
index 31fa9f5..bdd3cd2 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldCollectionResource.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldCollectionResource.java
@@ -77,11 +77,11 @@ public class TestFieldCollectionResource extends SolrRestletTestBase {
              "/fields/[0]/name=='HTMLstandardtok'",
              "/fields/[1]/name=='HTMLwhitetok'",
              "/fields/[2]/name=='_version_'",
-             "/fields/[108]/name=='*_d'",
-             "/fields/[107]/name=='*_f'",
-             "/fields/[106]/name=='*_b'",
-             "/fields/[105]/name=='*_t'",
-             "/fields/[104]/name=='*_l'"
+             "/fields/[107]/name=='*_d'",
+             "/fields/[106]/name=='*_f'",
+             "/fields/[105]/name=='*_b'",
+             "/fields/[104]/name=='*_t'",
+             "/fields/[103]/name=='*_l'"
 
     );
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
index ea19af0..08a3f1b 100644
--- a/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
+++ b/solr/core/src/test/org/apache/solr/rest/schema/TestFieldTypeResource.java
@@ -81,7 +81,8 @@ public class TestFieldTypeResource extends SolrRestletTestBase {
             "count(/response/lst[@name='fieldType']/*) = 3",
             "/response/lst[@name='fieldType']/str[@name='name'] = 'teststop'",
             "/response/lst[@name='fieldType']/str[@name='class'] = 'solr.TextField'",
-            "/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LowerCaseTokenizerFactory'",
+            "/response/lst[@name='fieldType']/lst[@name='analyzer']/lst[@name='tokenizer']/str[@name='class'] = 'solr.LetterTokenizerFactory'",
+            "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.LowerCaseFilterFactory']",
             "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='class'][.='solr.StopFilterFactory']",
             "/response/lst[@name='fieldType']/lst[@name='analyzer']/arr[@name='filters']/lst/str[@name='words'][.='stopwords.txt']"
             );

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java b/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java
index c7e0dc3..f66c03e 100644
--- a/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java
+++ b/solr/core/src/test/org/apache/solr/util/TestMaxTokenLenTokenizer.java
@@ -22,7 +22,6 @@ import org.junit.BeforeClass;
 
 /**
  * Tests for:
- * {@link org.apache.lucene.analysis.core.LowerCaseTokenizerFactory}
  * {@link org.apache.lucene.analysis.core.LetterTokenizerFactory}
  * {@link org.apache.lucene.analysis.core.KeywordTokenizerFactory}
  * {@link org.apache.lucene.analysis.core.WhitespaceTokenizerFactory}
@@ -44,25 +43,18 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
     // using fields with definitions, different tokenizer factories respectively at index time and standard tokenizer at query time.
 
     updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter\":\"letter\"}},\"commit\":{}}",null);
-    updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase\":\"lowerCase\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace\":\"whiteSpace in\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace\":\"unicode in\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword\":\"keyword\"}},\"commit\":{}}",null);
 
     assertU(commit());
 
-    assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]");
+    assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=4]");
 
     //Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3
     assertQ("Check the total number of docs", req("q","letter:let"), "//result[@numFound=1]");
     assertQ("Check the total number of docs", req("q","letter:lett"), "//result[@numFound=0]");
 
-    //Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3
-    assertQ("Check the total number of docs", req("q","lowerCase:low"), "//result[@numFound=1]");
-    assertQ("Check the total number of docs", req("q","lowerCase:l"), "//result[@numFound=0]");
-    assertQ("Check the total number of docs", req("q","lowerCase:lo"), "//result[@numFound=0]");
-    assertQ("Check the total number of docs", req("q","lowerCase:lower"), "//result[@numFound=0]");
-
     //Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3
     assertQ("Check the total number of docs", req("q","whiteSpace:whi"), "//result[@numFound=1]");
     assertQ("Check the total number of docs", req("q","whiteSpace:teS"), "//result[@numFound=1]");
@@ -88,14 +80,13 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
     // using fields with definitions, same tokenizers both at index and query time.
 
     updateJ("{\"add\":{\"doc\": {\"id\":1,\"letter0\":\"letter\"}},\"commit\":{}}",null);
-    updateJ("{\"add\":{\"doc\": {\"id\":2,\"lowerCase0\":\"lowerCase\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":3,\"whiteSpace0\":\"whiteSpace in\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":4,\"unicodeWhiteSpace0\":\"unicode in\"}},\"commit\":{}}",null);
     updateJ("{\"add\":{\"doc\": {\"id\":5,\"keyword0\":\"keyword\"}},\"commit\":{}}",null);
 
     assertU(commit());
 
-    assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=5]");
+    assertQ("Check the total number of docs", req("q","*:*"), "//result[@numFound=4]");
 
     //Tokens generated for "letter": "let" "ter" "letter" , maxTokenLen=3
     // Anything that matches the first three letters should be found when maxLen=3
@@ -104,13 +95,6 @@ public class TestMaxTokenLenTokenizer extends SolrTestCaseJ4 {
     assertQ("Check the total number of docs", req("q","letter0:lett"), "//result[@numFound=1]");
     assertQ("Check the total number of docs", req("q","letter0:letXYZ"), "//result[@numFound=1]");
 
-    //Tokens generated for "lowerCase": "low" "erC" "ase" "lowerCase" , maxTokenLen=3
-    // Anything that matches the first three letters should be found when maxLen=3
-    assertQ("Check the total number of docs", req("q","lowerCase0:low"), "//result[@numFound=1]");
-    assertQ("Check the total number of docs", req("q","lowerCase0:l"), "//result[@numFound=0]");
-    assertQ("Check the total number of docs", req("q","lowerCase0:lo"), "//result[@numFound=0]");
-    assertQ("Check the total number of docs", req("q","lowerCase0:lowerXYZ"), "//result[@numFound=1]");
-
     //Tokens generated for "whiteSpace in": "whi" "teS" "pac" "e" "in" "whiteSpace" , maxTokenLen=3
     // Anything that matches the first three letters should be found when maxLen=3
     assertQ("Check the total number of docs", req("q","whiteSpace0:h"), "//result[@numFound=0]");

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml
index 3a1f328..974893c 100644
--- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml
+++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema-sql.xml
@@ -141,18 +141,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldtype>
 
-  <fieldtype name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
-
   <!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldtype name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldtype>
   <fieldtype name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -493,8 +482,6 @@
          termPositions="true" termOffsets="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml
index 02b5053..079a35f 100644
--- a/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml
+++ b/solr/solrj/src/test-files/solrj/solr/collection1/conf/schema.xml
@@ -116,18 +116,7 @@
     <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
   </fieldType>
 
-  <fieldType name="teststop" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
-
   <!-- fieldTypes in this section isolate tokenizers and tokenfilters for testing -->
-  <fieldType name="lowertok" class="solr.TextField">
-    <analyzer>
-      <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-    </analyzer>
-  </fieldType>
   <fieldType name="keywordtok" class="solr.TextField">
     <analyzer>
       <tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/>
@@ -461,8 +450,6 @@
          termPositions="true" termOffsets="true"/>
 
   <!-- fields to test individual tokenizers and tokenfilters -->
-  <field name="teststop" type="teststop" indexed="true" stored="true"/>
-  <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/c0d29759/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml
index aa96296..6cd4f91 100644
--- a/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml
+++ b/solr/solrj/src/test-files/solrj/solr/configsets/streaming/conf/schema.xml
@@ -137,16 +137,7 @@
       <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
     </fieldtype>
 
-    <fieldtype name="teststop" class="solr.TextField">
-      <analyzer>
-        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
-      </analyzer>
-    </fieldtype>
-
     <!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
-    <fieldtype name="lowertok" class="solr.TextField">
-      <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
-    </fieldtype>
     <fieldtype name="keywordtok" class="solr.TextField">
       <analyzer><tokenizer class="solr.MockTokenizerFactory" pattern="keyword"/></analyzer>
     </fieldtype>
@@ -479,8 +470,6 @@
            termPositions="true" termOffsets="true"/>
 
     <!-- fields to test individual tokenizers and tokenfilters -->
-    <field name="teststop" type="teststop" indexed="true" stored="true"/>
-    <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
     <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
     <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
     <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>