You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by jp...@apache.org on 2016/07/12 14:36:47 UTC

[3/3] lucene-solr:master: LUCENE-7355: Add Analyzer#normalize() and use it in query parsers.

LUCENE-7355: Add Analyzer#normalize() and use it in query parsers.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/e92a38af
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/e92a38af
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/e92a38af

Branch: refs/heads/master
Commit: e92a38af90d12e51390b4307ccbe0c24ac7b6b4e
Parents: ced9140
Author: Adrien Grand <jp...@gmail.com>
Authored: Tue Jun 28 18:23:11 2016 +0200
Committer: Adrien Grand <jp...@gmail.com>
Committed: Tue Jul 12 16:33:07 2016 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  10 +
 lucene/MIGRATE.txt                              |  11 +
 .../lucene/analysis/ar/ArabicAnalyzer.java      |   8 +
 .../lucene/analysis/bg/BulgarianAnalyzer.java   |   7 +
 .../lucene/analysis/br/BrazilianAnalyzer.java   |   7 +
 .../lucene/analysis/ca/CatalanAnalyzer.java     |   8 +
 .../apache/lucene/analysis/cjk/CJKAnalyzer.java |   7 +
 .../lucene/analysis/ckb/SoraniAnalyzer.java     |   9 +
 .../lucene/analysis/core/SimpleAnalyzer.java    |   6 +
 .../lucene/analysis/core/StopAnalyzer.java      |   6 +
 .../lucene/analysis/custom/CustomAnalyzer.java  |  28 +-
 .../lucene/analysis/cz/CzechAnalyzer.java       |   7 +
 .../lucene/analysis/da/DanishAnalyzer.java      |   7 +
 .../lucene/analysis/de/GermanAnalyzer.java      |   8 +
 .../lucene/analysis/el/GreekAnalyzer.java       |   7 +
 .../lucene/analysis/en/EnglishAnalyzer.java     |   7 +
 .../lucene/analysis/es/SpanishAnalyzer.java     |   7 +
 .../lucene/analysis/eu/BasqueAnalyzer.java      |   7 +
 .../lucene/analysis/fa/PersianAnalyzer.java     |  14 +-
 .../lucene/analysis/fi/FinnishAnalyzer.java     |   7 +
 .../lucene/analysis/fr/FrenchAnalyzer.java      |   8 +
 .../lucene/analysis/ga/IrishAnalyzer.java       |   8 +
 .../lucene/analysis/gl/GalicianAnalyzer.java    |   7 +
 .../lucene/analysis/hi/HindiAnalyzer.java       |  11 +
 .../lucene/analysis/hu/HungarianAnalyzer.java   |   7 +
 .../lucene/analysis/hy/ArmenianAnalyzer.java    |   7 +
 .../lucene/analysis/id/IndonesianAnalyzer.java  |   7 +
 .../lucene/analysis/it/ItalianAnalyzer.java     |   8 +
 .../lucene/analysis/lt/LithuanianAnalyzer.java  |   7 +
 .../lucene/analysis/lv/LatvianAnalyzer.java     |   7 +
 .../lucene/analysis/nl/DutchAnalyzer.java       |   7 +
 .../lucene/analysis/no/NorwegianAnalyzer.java   |   7 +
 .../lucene/analysis/pt/PortugueseAnalyzer.java  |   7 +
 .../lucene/analysis/ro/RomanianAnalyzer.java    |   7 +
 .../lucene/analysis/ru/RussianAnalyzer.java     |   7 +
 .../analysis/standard/ClassicAnalyzer.java      |   5 +
 .../standard/UAX29URLEmailAnalyzer.java         |   5 +
 .../lucene/analysis/sv/SwedishAnalyzer.java     |   7 +
 .../apache/lucene/analysis/th/ThaiAnalyzer.java |   7 +
 .../lucene/analysis/tr/TurkishAnalyzer.java     |   7 +
 .../lucene/collation/CollationKeyAnalyzer.java  |   7 +
 .../core/TestAllAnalyzersHaveFactories.java     |   2 +
 .../lucene/analysis/core/TestAnalyzers.java     |   4 +
 .../lucene/analysis/core/TestRandomChains.java  |  10 +-
 .../analysis/custom/TestCustomAnalyzer.java     | 143 ++++++++++
 .../lucene/analysis/ja/JapaneseAnalyzer.java    |   7 +
 .../analysis/morfologik/MorfologikAnalyzer.java |   6 +
 .../analysis/cn/smart/SmartChineseAnalyzer.java |   6 +
 .../lucene/analysis/pl/PolishAnalyzer.java      |   7 +
 .../org/apache/lucene/analysis/Analyzer.java    | 135 +++++++++-
 .../analysis/standard/StandardAnalyzer.java     |   7 +
 .../analysis/standard/TestStandardAnalyzer.java |   6 +
 .../analyzing/AnalyzingQueryParser.java         | 202 --------------
 .../queryparser/analyzing/package-info.java     |  22 --
 .../queryparser/classic/QueryParserBase.java    | 140 +++-------
 .../complexPhrase/ComplexPhraseQueryParser.java |  17 +-
 .../CommonQueryParserConfiguration.java         |  12 -
 .../flexible/standard/StandardQueryParser.java  |  30 ---
 .../config/StandardQueryConfigHandler.java      |   9 -
 .../processors/FuzzyQueryNodeProcessor.java     |  11 +-
 ...owercaseExpandedTermsQueryNodeProcessor.java | 100 -------
 .../processors/RegexpQueryNodeProcessor.java    |  56 ++++
 .../StandardQueryNodeProcessorPipeline.java     |   4 +-
 .../processors/TermRangeQueryNodeProcessor.java |  11 +-
 .../processors/WildcardQueryNodeProcessor.java  |  58 +++-
 .../queryparser/simple/SimpleQueryParser.java   |   9 +-
 .../analyzing/TestAnalyzingQueryParser.java     | 268 -------------------
 .../queryparser/classic/TestQueryParser.java    | 241 ++++++++++++++---
 .../precedence/TestPrecedenceQueryParser.java   |  44 +--
 .../flexible/standard/TestQPHelper.java         |  57 ++--
 .../flexible/standard/TestStandardQP.java       |  15 --
 .../queryparser/util/QueryParserTestBase.java   |  63 ++---
 .../analysis/BaseTokenStreamTestCase.java       |   5 +-
 .../apache/lucene/analysis/MockAnalyzer.java    |  11 +-
 .../lucene/analysis/MockBytesAnalyzer.java      |   7 +
 .../lucene/analysis/MockLowerCaseFilter.java    |  40 +++
 .../apache/solr/analysis/TokenizerChain.java    |  28 +-
 77 files changed, 1179 insertions(+), 942 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index eba11c9..c520e1b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -15,6 +15,9 @@ API Changes
 
 * LUCENE-7368: Removed query normalization. (Adrien Grand)
 
+* LUCENE-7355: AnalyzingQueryParser has been removed as its functionality has
+  been folded into the classic QueryParser. (Adrien Grand)
+
 Bug Fixes
 
 Improvements
@@ -48,6 +51,9 @@ New Features
   methods Directory.rename and Directory.syncMetaData instead (Robert Muir,
   Uwe Schindler, Mike McCandless)
 
+* LUCENE-7355: Added Analyzer#normalize(), which only applies normalization to
+  an input string. (Adrien Grand)
+
 Bug Fixes
 
 * LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
@@ -99,6 +105,10 @@ Improvements
 * LUCENE-7276: MatchNoDocsQuery now includes an optional reason for
   why it was used (Jim Ferenczi via Mike McCandless)
 
+* LUCENE-7355: AnalyzingQueryParser now only applies the subset of the analysis
+  chain that is about normalization for range/fuzzy/wildcard queries.
+  (Adrien Grand)
+
 Optimizations
 
 * LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/MIGRATE.txt
----------------------------------------------------------------------
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index f914529..06e6a81 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -36,3 +36,14 @@ Query normalization's goal was to make scores comparable across queries, which
 was only implemented by the ClassicSimilarity. Since ClassicSimilarity is not
 the default similarity anymore, this functionality has been removed. Boosts are
 now propagated through Query#createWeight.
+
+## AnalyzingQueryParser removed (LUCENE-7355)
+
+The functionality of AnalyzingQueryParser has been folded into the classic
+QueryParser, which now passes terms through Analyzer#normalize when generating
+queries.
+
+## CommonQueryParserConfiguration.setLowerCaseExpandedTerms removed (LUCENE-7355)
+
+This option has been removed as expanded terms are now normalized through
+Analyzer#normalize.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
index 889a886..61100dd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@@ -143,5 +143,13 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
     }
     return new TokenStreamComponents(source, new ArabicStemFilter(result));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new LowerCaseFilter(in);
+    result = new DecimalDigitFilter(result);
+    result = new ArabicNormalizationFilter(result);
+    return result;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
index 9cb0657..06c7eea 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@@ -126,4 +126,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
     result = new BulgarianStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
index 5dd0cbc..ad1af92 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@@ -127,5 +127,12 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
       result = new SetKeywordMarkerFilter(result, excltable);
     return new TokenStreamComponents(source, new BrazilianStemFilter(result));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
index 739b61a..56f36e1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@@ -130,4 +130,12 @@ public final class CatalanAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new CatalanStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new ElisionFilter(result, DEFAULT_ARTICLES);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
index d500ff9..d4214a1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@@ -92,4 +92,11 @@ public final class CJKAnalyzer extends StopwordAnalyzerBase {
     result = new CJKBigramFilter(result);
     return new TokenStreamComponents(source, new StopFilter(result, stopwords));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new CJKWidthFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
index 0f283b8..e7ce3f3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
@@ -126,4 +126,13 @@ public final class SoraniAnalyzer extends StopwordAnalyzerBase {
     result = new SoraniStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new SoraniNormalizationFilter(result);
+    result = new LowerCaseFilter(result);
+    result = new DecimalDigitFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index d0fdcf6..6e0f2f0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -19,6 +19,7 @@ package org.apache.lucene.analysis.core;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.TokenStream;
 
 /** An {@link Analyzer} that filters {@link LetterTokenizer} 
  *  with {@link LowerCaseFilter} 
@@ -35,4 +36,9 @@ public final class SimpleAnalyzer extends Analyzer {
   protected TokenStreamComponents createComponents(final String fieldName) {
     return new TokenStreamComponents(new LowerCaseTokenizer());
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new LowerCaseFilter(in);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index 3fa4982..7d7f532 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -25,6 +25,7 @@ import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -79,5 +80,10 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
     final Tokenizer source = new LowerCaseTokenizer();
     return new TokenStreamComponents(source, new StopFilter(source, stopwords));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new LowerCaseFilter(in);
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
index f2ed01f..b2de5e8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/custom/CustomAnalyzer.java
@@ -37,6 +37,7 @@ import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.FilesystemResourceLoader;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
@@ -118,15 +119,38 @@ public final class CustomAnalyzer extends Analyzer {
   }
 
   @Override
+  protected Reader initReaderForNormalization(String fieldName, Reader reader) {
+    for (CharFilterFactory charFilter : charFilters) {
+      if (charFilter instanceof MultiTermAwareComponent) {
+        charFilter = (CharFilterFactory) ((MultiTermAwareComponent) charFilter).getMultiTermComponent();
+        reader = charFilter.create(reader);
+      }
+    }
+    return reader;
+  }
+
+  @Override
   protected TokenStreamComponents createComponents(String fieldName) {
-    final Tokenizer tk = tokenizer.create();
+    final Tokenizer tk = tokenizer.create(attributeFactory());
     TokenStream ts = tk;
     for (final TokenFilterFactory filter : tokenFilters) {
       ts = filter.create(ts);
     }
     return new TokenStreamComponents(tk, ts);
   }
-  
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = in;
+    for (TokenFilterFactory filter : tokenFilters) {
+      if (filter instanceof MultiTermAwareComponent) {
+        filter = (TokenFilterFactory) ((MultiTermAwareComponent) filter).getMultiTermComponent();
+        result = filter.create(in);
+      }
+    }
+    return result;
+  }
+
   @Override
   public int getPositionIncrementGap(String fieldName) {
     // use default from Analyzer base class if null

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
index 9777179..fbb9efa 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@@ -125,5 +125,12 @@ public final class CzechAnalyzer extends StopwordAnalyzerBase {
     result = new CzechStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
index f9c316d..ccbd9d1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@@ -124,4 +124,11 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new DanishStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
index 790fc48..8a39945 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@@ -139,4 +139,12 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
     result = new GermanLightStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    result = new GermanNormalizationFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
index c85b6ec..bd09d25 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@@ -104,4 +104,11 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
     result = new GreekStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new GreekLowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
index 16dc0c5..94ba43a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@@ -107,4 +107,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
     result = new PorterStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
index ab5b6c3..3b21cdd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@@ -123,4 +123,11 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
     result = new SpanishLightStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
index cff2da0..4bc1ba7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@@ -121,4 +121,11 @@ public final class BasqueAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new BasqueStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index f29dfd3..9aebc2d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 /**
@@ -125,7 +126,18 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
      */
     return new TokenStreamComponents(source, new StopFilter(result, stopwords));
   }
-  
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    result = new DecimalDigitFilter(result);
+    result = new ArabicNormalizationFilter(result);
+    /* additional persian-specific normalization */
+    result = new PersianNormalizationFilter(result);
+    return result;
+  }
+
   /** 
    * Wraps the Reader with {@link PersianCharFilter}
    */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
index 6b00101..69cc537 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@@ -124,4 +124,11 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new FinnishStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
index 5f90246..2e072be 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@@ -144,5 +144,13 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
     result = new FrenchLightStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new ElisionFilter(result, DEFAULT_ARTICLES);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
index 1ca3455..3ae366d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
@@ -141,4 +141,12 @@ public final class IrishAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new IrishStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new ElisionFilter(result, DEFAULT_ARTICLES);
+    result = new IrishLowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
index 372a6ec..4f70596 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@@ -122,4 +122,11 @@ public final class GalicianAnalyzer extends StopwordAnalyzerBase {
     result = new GalicianStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
index 8e4868b..f339295 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 
 /**
@@ -125,4 +126,14 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
     result = new HindiStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    result = new DecimalDigitFilter(result);
+    result = new IndicNormalizationFilter(result);
+    result = new HindiNormalizationFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
index 0615bdc..e980f5a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@@ -124,4 +124,11 @@ public final class HungarianAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new HungarianStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
index 8c04639..95506e1 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@@ -121,4 +121,11 @@ public final class ArmenianAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new ArmenianStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
index fc9b4d2..9804bea 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@@ -119,4 +119,11 @@ public final class IndonesianAnalyzer extends StopwordAnalyzerBase {
     }
     return new TokenStreamComponents(source, new IndonesianStemFilter(result));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index a18aa5d..32f4e30 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -133,4 +133,12 @@ public final class ItalianAnalyzer extends StopwordAnalyzerBase {
     result = new ItalianLightStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new ElisionFilter(result, DEFAULT_ARTICLES);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
index 5e24cf9..4eccc51 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
@@ -121,4 +121,11 @@ public final class LithuanianAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new LithuanianStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
index 0a016af..1b08b3b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@@ -122,4 +122,11 @@ public final class LatvianAnalyzer extends StopwordAnalyzerBase {
     result = new LatvianStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index 0391425..900d9c6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -159,4 +159,11 @@ public final class DutchAnalyzer extends Analyzer {
     result = new SnowballFilter(result, new org.tartarus.snowball.ext.DutchStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index c413793..3570ad4 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -124,5 +124,12 @@ public final class NorwegianAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new NorwegianStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index 769e142..8f54803 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -123,4 +123,11 @@ public final class PortugueseAnalyzer extends StopwordAnalyzerBase {
     result = new PortugueseLightStemFilter(result);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index 06ff999..1b74184 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -126,4 +126,11 @@ public final class RomanianAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new RomanianStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index dfe8ef3..76bf495 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -121,4 +121,11 @@ public final class RussianAnalyzer extends StopwordAnalyzerBase {
       result = new SnowballFilter(result, new org.tartarus.snowball.ext.RussianStemmer());
       return new TokenStreamComponents(source, result);
     }
+
+    @Override
+    protected TokenStream normalize(String fieldName, TokenStream in) {
+      TokenStream result = new StandardFilter(in);
+      result = new LowerCaseFilter(result);
+      return result;
+    }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index dc6c118..ef2ef7e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -100,4 +100,9 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
       }
     };
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new LowerCaseFilter(in);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index 9994884..fe71b7e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -97,4 +97,9 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
       }
     };
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new LowerCaseFilter(in);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index fd2aa2e..3896d3e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -124,4 +124,11 @@ public final class SwedishAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new SwedishStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index 11f3f77..c1426b8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -102,4 +102,11 @@ public final class ThaiAnalyzer extends StopwordAnalyzerBase {
     result = new StopFilter(result, stopwords);
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new LowerCaseFilter(in);
+    result = new DecimalDigitFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index a21495f..719e434 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -127,4 +127,11 @@ public final class TurkishAnalyzer extends StopwordAnalyzerBase {
     result = new SnowballFilter(result, new TurkishStemmer());
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new TurkishLowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
index f7b15f6..ea98731 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/CollationKeyAnalyzer.java
@@ -20,6 +20,8 @@ package org.apache.lucene.collation;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.util.AttributeFactory;
+
 import java.text.Collator;
 
 /**
@@ -83,6 +85,11 @@ public final class CollationKeyAnalyzer extends Analyzer {
   }
 
   @Override
+  protected AttributeFactory attributeFactory() {
+    return factory;
+  }
+
+  @Override
   protected TokenStreamComponents createComponents(String fieldName) {
     KeywordTokenizer tokenizer = new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE);
     return new TokenStreamComponents(tokenizer, tokenizer);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
index d826a60..7099566 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
@@ -35,6 +35,7 @@ import org.apache.lucene.analysis.MockCharFilter;
 import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
+import org.apache.lucene.analysis.MockLowerCaseFilter;
 import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
 import org.apache.lucene.analysis.MockSynonymFilter;
 import org.apache.lucene.analysis.MockTokenFilter;
@@ -75,6 +76,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
       MockFixedLengthPayloadFilter.class,
       MockGraphTokenFilter.class,
       MockHoleInjectingTokenFilter.class,
+      MockLowerCaseFilter.class,
       MockRandomLookaheadTokenFilter.class,
       MockSynonymFilter.class,
       MockTokenFilter.class,

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
index 8f7f2cd..6d514d1 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@@ -52,6 +52,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
                      new String[] { "b" });
     assertAnalyzesTo(a, "\"QUOTED\" word", 
                      new String[] { "quoted", "word" });
+    assertEquals(new BytesRef("\"\\�3[]()! cz@"), a.normalize("dummy", "\"\\�3[]()! Cz@"));
     a.close();
   }
 
@@ -73,6 +74,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
                      new String[] { "2B" });
     assertAnalyzesTo(a, "\"QUOTED\" word", 
                      new String[] { "\"QUOTED\"", "word" });
+    assertEquals(new BytesRef("\"\\�3[]()! Cz@"), a.normalize("dummy", "\"\\�3[]()! Cz@"));
     a.close();
   }
 
@@ -82,6 +84,8 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
                      new String[] { "foo", "bar", "foo", "bar" });
     assertAnalyzesTo(a, "foo a bar such FOO THESE BAR", 
                      new String[] { "foo", "bar", "foo", "bar" });
+    assertEquals(new BytesRef("\"\\�3[]()! cz@"), a.normalize("dummy", "\"\\�3[]()! Cz@"));
+    assertEquals(new BytesRef("the"), a.normalize("dummy", "the"));
     a.close();
   }
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index 4effc79..25ca7a3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -928,6 +928,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
           System.out.println("Creating random analyzer:" + a);
         }
         try {
+          checkNormalize(a);
           checkRandomData(random, a, 500*RANDOM_MULTIPLIER, 20, false,
               false /* We already validate our own offsets... */);
         } catch (Throwable e) {
@@ -937,7 +938,14 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
       }
     }
   }
-  
+
+  public void checkNormalize(Analyzer a) {
+    // normalization should not modify characters that may be used for wildcards
+    // or regular expressions
+    String s = "([0-9]+)?*";
+    assertEquals(s, a.normalize("dummy", s).utf8ToString());
+  }
+
   // we might regret this decision...
   public void testRandomChainsWithLargeStrings() throws Throwable {
     int numIterations = TEST_NIGHTLY ? atLeast(20) : 3;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
index 5160dab..aa69b70 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/custom/TestCustomAnalyzer.java
@@ -17,6 +17,8 @@
 package org.apache.lucene.analysis.custom;
 
 
+import java.io.IOException;
+import java.io.Reader;
 import java.nio.file.Paths;
 import java.util.Collections;
 import java.util.HashMap;
@@ -24,16 +26,25 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory;
+import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
 import org.apache.lucene.analysis.core.LowerCaseFilterFactory;
+import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.StopFilterFactory;
 import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory;
 import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory;
 import org.apache.lucene.analysis.standard.ClassicTokenizerFactory;
 import org.apache.lucene.analysis.standard.StandardTokenizerFactory;
+import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.CharFilterFactory;
+import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.SetOnce.AlreadySetException;
 import org.apache.lucene.util.Version;
 
@@ -336,4 +347,136 @@ public class TestCustomAnalyzer extends BaseTokenStreamTestCase {
     });
   }
 
+  private static class DummyCharFilter extends CharFilter {
+
+    private final char match, repl;
+
+    public DummyCharFilter(Reader input, char match, char repl) {
+      super(input);
+      this.match = match;
+      this.repl = repl;
+    }
+
+    @Override
+    protected int correct(int currentOff) {
+      return currentOff;
+    }
+
+    @Override
+    public int read(char[] cbuf, int off, int len) throws IOException {
+      final int read = input.read(cbuf, off, len);
+      for (int i = 0; i < read; ++i) {
+        if (cbuf[off+i] == match) {
+          cbuf[off+i] = repl;
+        }
+      }
+      return read;
+    }
+    
+  }
+
+  public static class DummyCharFilterFactory extends CharFilterFactory {
+
+    private final char match, repl;
+
+    public DummyCharFilterFactory(Map<String,String> args) {
+      this(args, '0', '1');
+    }
+
+    DummyCharFilterFactory(Map<String,String> args, char match, char repl) {
+      super(args);
+      this.match = match;
+      this.repl = repl;
+    }
+
+    @Override
+    public Reader create(Reader input) {
+      return new DummyCharFilter(input, match, repl);
+    }
+    
+  }
+
+  public static class DummyMultiTermAwareCharFilterFactory extends DummyCharFilterFactory implements MultiTermAwareComponent {
+
+    public DummyMultiTermAwareCharFilterFactory(Map<String,String> args) {
+      super(args);
+    }
+
+    @Override
+    public AbstractAnalysisFactory getMultiTermComponent() {
+      return new DummyCharFilterFactory(Collections.emptyMap(), '0', '2');
+    }
+
+  }
+
+  public static class DummyTokenizerFactory extends TokenizerFactory {
+
+    public DummyTokenizerFactory(Map<String,String> args) {
+      super(args);
+    }
+
+    @Override
+    public Tokenizer create(AttributeFactory factory) {
+      return new LowerCaseTokenizer(factory);
+    }
+
+  }
+
+  public static class DummyMultiTermAwareTokenizerFactory extends DummyTokenizerFactory implements MultiTermAwareComponent {
+
+    public DummyMultiTermAwareTokenizerFactory(Map<String,String> args) {
+      super(args);
+    }
+
+    @Override
+    public AbstractAnalysisFactory getMultiTermComponent() {
+      return new KeywordTokenizerFactory(getOriginalArgs());
+    }
+    
+  }
+
+  public static class DummyTokenFilterFactory extends TokenFilterFactory {
+
+    public DummyTokenFilterFactory(Map<String,String> args) {
+      super(args);
+    }
+
+    @Override
+    public TokenStream create(TokenStream input) {
+      return input;
+    }
+    
+  }
+
+  public static class DummyMultiTermAwareTokenFilterFactory extends DummyTokenFilterFactory implements MultiTermAwareComponent {
+
+    public DummyMultiTermAwareTokenFilterFactory(Map<String,String> args) {
+      super(args);
+    }
+
+    @Override
+    public AbstractAnalysisFactory getMultiTermComponent() {
+      return new ASCIIFoldingFilterFactory(Collections.emptyMap());
+    }
+    
+  }
+
+  public void testNormalization() throws IOException {
+    CustomAnalyzer analyzer1 = CustomAnalyzer.builder()
+        // none of these components are multi-term aware so they should not be applied
+        .withTokenizer(DummyTokenizerFactory.class, Collections.emptyMap())
+        .addCharFilter(DummyCharFilterFactory.class, Collections.emptyMap())
+        .addTokenFilter(DummyTokenFilterFactory.class, Collections.emptyMap())
+        .build();
+    assertEquals(new BytesRef("0�"), analyzer1.normalize("dummy", "0�"));
+
+    CustomAnalyzer analyzer2 = CustomAnalyzer.builder()
+        // these components are multi-term aware so they should be applied
+        .withTokenizer(DummyMultiTermAwareTokenizerFactory.class, Collections.emptyMap())
+        .addCharFilter(DummyMultiTermAwareCharFilterFactory.class, Collections.emptyMap())
+        .addTokenFilter(DummyMultiTermAwareTokenFilterFactory.class, Collections.emptyMap())
+        .build();
+    assertEquals(new BytesRef("2A"), analyzer2.normalize("dummy", "0�"));
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
index 46d40b1..06e119e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
@@ -94,4 +94,11 @@ public class JapaneseAnalyzer extends StopwordAnalyzerBase {
     stream = new LowerCaseFilter(stream);
     return new TokenStreamComponents(tokenizer, stream);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new CJKWidthFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
index 091acfd..0caca35 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikAnalyzer.java
@@ -23,6 +23,7 @@ import morfologik.stemming.Dictionary;
 import morfologik.stemming.polish.PolishStemmer;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
@@ -69,4 +70,9 @@ public class MorfologikAnalyzer extends Analyzer {
         src, 
         new MorfologikFilter(new StandardFilter(src), dictionary));
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new StandardFilter(in);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
index 5f0347b..f604d4b 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
@@ -22,6 +22,7 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
@@ -139,4 +140,9 @@ public final class SmartChineseAnalyzer extends Analyzer {
     }
     return new TokenStreamComponents(tokenizer, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return new LowerCaseFilter(in);
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
index 6ed4fda..2d3ef4c 100644
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@@ -146,4 +146,11 @@ public final class PolishAnalyzer extends StopwordAnalyzerBase {
     result = new StempelFilter(result, new StempelStemmer(stemTable));
     return new TokenStreamComponents(source, result);
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
index cce740d..0d60d24 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/Analyzer.java
@@ -18,11 +18,18 @@ package org.apache.lucene.analysis;
 
 
 import java.io.Closeable;
+import java.io.IOException;
 import java.io.Reader;
+import java.io.StringReader;
 import java.util.HashMap;
 import java.util.Map;
 
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
 import org.apache.lucene.store.AlreadyClosedException;
+import org.apache.lucene.util.AttributeFactory;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.CloseableThreadLocal;
 import org.apache.lucene.util.Version;
 
@@ -44,6 +51,12 @@ import org.apache.lucene.util.Version;
  *     filter = new BarFilter(filter);
  *     return new TokenStreamComponents(source, filter);
  *   }
+ *   {@literal @Override}
+ *   protected TokenStream normalize(TokenStream in) {
+ *     // Assuming FooFilter is about normalization and BarFilter is about
+ *     // stemming, only FooFilter should be applied
+ *     return new FooFilter(in);
+ *   }
  * };
  * </pre>
  * For more examples, see the {@link org.apache.lucene.analysis Analysis package documentation}.
@@ -108,6 +121,15 @@ public abstract class Analyzer implements Closeable {
   protected abstract TokenStreamComponents createComponents(String fieldName);
 
   /**
+   * Wrap the given {@link TokenStream} in order to apply normalization filters.
+   * The default implementation returns the {@link TokenStream} as-is. This is
+   * used by {@link #normalize(String, String)}.
+   */
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    return in;
+  }
+
+  /**
    * Returns a TokenStream suitable for <code>fieldName</code>, tokenizing
    * the contents of <code>reader</code>.
    * <p>
@@ -181,7 +203,65 @@ public abstract class Analyzer implements Closeable {
     components.reusableStringReader = strReader;
     return components.getTokenStream();
   }
-    
+
+  /**
+   * Normalize a string down to the representation that it would have in the
+   * index.
+   * <p>
+   * This is typically used by query parsers in order to generate a query on
+   * a given term, without tokenizing or stemming, which are undesirable if
+   * the string to analyze is a partial word (eg. in case of a wildcard or
+   * fuzzy query).
+   * <p>
+   * This method uses {@link #initReaderForNormalization(String, Reader)} in
+   * order to apply necessary character-level normalization and then
+   * {@link #normalize(String, TokenStream)} in order to apply the normalizing
+   * token filters.
+   */
+  public final BytesRef normalize(final String fieldName, final String text) {
+    try {
+      // apply char filters
+      final String filteredText;
+      try (Reader reader = new StringReader(text)) {
+        Reader filterReader = initReaderForNormalization(fieldName, reader);
+        char[] buffer = new char[64];
+        StringBuilder builder = new StringBuilder();
+        for (;;) {
+          final int read = filterReader.read(buffer, 0, buffer.length);
+          if (read == -1) {
+            break;
+          }
+          builder.append(buffer, 0, read);
+        }
+        filteredText = builder.toString();
+      } catch (IOException e) {
+        throw new IllegalStateException("Normalization threw an unexpected exeption", e);
+      }
+
+      final AttributeFactory attributeFactory = attributeFactory();
+      try (TokenStream ts = normalize(fieldName,
+          new StringTokenStream(attributeFactory, filteredText, text.length()))) {
+        final TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
+        ts.reset();
+        if (ts.incrementToken() == false) {
+          throw new IllegalStateException("The normalization token stream is "
+              + "expected to produce exactly 1 token, but got 0 for analyzer "
+              + this + " and input \"" + text + "\"");
+        }
+        final BytesRef term = BytesRef.deepCopyOf(termAtt.getBytesRef());
+        if (ts.incrementToken()) {
+          throw new IllegalStateException("The normalization token stream is "
+              + "expected to produce exactly 1 token, but got 2+ for analyzer "
+              + this + " and input \"" + text + "\"");
+        }
+        ts.end();
+        return term;
+      }
+    } catch (IOException e) {
+      throw new IllegalStateException("Normalization threw an unexpected exeption", e);
+    }
+  }
+
   /**
    * Override this if you want to add a CharFilter chain.
    * <p>
@@ -196,6 +276,22 @@ public abstract class Analyzer implements Closeable {
     return reader;
   }
 
+  /** Wrap the given {@link Reader} with {@link CharFilter}s that make sense
+   *  for normalization. This is typically a subset of the {@link CharFilter}s
+   *  that are applied in {@link #initReader(String, Reader)}. This is used by
+   *  {@link #normalize(String, String)}. */
+  protected Reader initReaderForNormalization(String fieldName, Reader reader) {
+    return reader;
+  }
+
+  /** Return the {@link AttributeFactory} to be used for
+   *  {@link #tokenStream analysis} and
+   *  {@link #normalize(String, String) normalization}. The default
+   *  implementation returns {@link AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}. */
+  protected AttributeFactory attributeFactory() {
+    return AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
+  }
+
   /**
    * Invoked before indexing a IndexableField instance if
    * terms have already been added to that field.  This allows custom
@@ -435,4 +531,41 @@ public abstract class Analyzer implements Closeable {
     }
   };
 
+  private static final class StringTokenStream extends TokenStream {
+
+    private final String value;
+    private final int length;
+    private boolean used = true;
+    private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+    private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+
+    StringTokenStream(AttributeFactory attributeFactory, String value, int length) {
+      super(attributeFactory);
+      this.value = value;
+      this.length = length;
+    }
+
+    @Override
+    public void reset() {
+      used = false;
+    }
+
+    @Override
+    public boolean incrementToken() {
+      if (used) {
+        return false;
+      }
+      clearAttributes();
+      termAttribute.append(value);
+      offsetAttribute.setOffset(0, length);
+      used = true;
+      return true;
+    }
+
+    @Override
+    public void end() throws IOException {
+      super.end();
+      offsetAttribute.setOffset(length, length);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
index 251017d..fb57573 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -112,4 +112,11 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
       }
     };
   }
+
+  @Override
+  protected TokenStream normalize(String fieldName, TokenStream in) {
+    TokenStream result = new StandardFilter(in);
+    result = new LowerCaseFilter(result);
+    return result;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
index 6c6ddc8..2cc9274 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -27,6 +27,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.TestUtil;
 
 public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
@@ -387,4 +388,9 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
     checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
     analyzer.close();
   }
+
+  public void testNormalize() {
+    Analyzer a = new StandardAnalyzer();
+    assertEquals(new BytesRef("\"\\�3[]()! cz@"), a.normalize("dummy", "\"\\�3[]()! Cz@"));
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
deleted file mode 100644
index 49690fe..0000000
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.queryparser.analyzing;
-
-import java.io.IOException;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.search.Query;
-
-/**
- * Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
- * are also passed through the given analyzer, but wildcard characters <code>*</code> and
- * <code>?</code> don't get removed from the search terms.
- * 
- * <p><b>Warning:</b> This class should only be used with analyzers that do not use stopwords
- * or that add tokens. Also, several stemming analyzers are inappropriate: for example, GermanAnalyzer 
- * will turn <code>H&auml;user</code> into <code>hau</code>, but <code>H?user</code> will 
- * become <code>h?user</code> when using this parser and thus no match would be found (i.e.
- * using this parser will be no improvement over QueryParser in such cases). 
- */
-public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.QueryParser {
-  // gobble escaped chars or find a wildcard character 
-  private final Pattern wildcardPattern = Pattern.compile("(\\.)|([?*]+)");
-  public AnalyzingQueryParser(String field, Analyzer analyzer) {
-    super(field, analyzer);
-    setAnalyzeRangeTerms(true);
-  }
-
-  /**
-   * Called when parser parses an input term that contains one or more wildcard
-   * characters (like <code>*</code>), but is not a prefix term (one that has
-   * just a single <code>*</code> character at the end).
-   * <p>
-   * Example: will be called for <code>H?user</code> or for <code>H*user</code>.
-   * <p>
-   * Depending on analyzer and settings, a wildcard term may (most probably will)
-   * be lower-cased automatically. It <b>will</b> go through the default Analyzer.
-   * <p>
-   * Overrides super class, by passing terms through analyzer.
-   *
-   * @param  field   Name of the field query will use.
-   * @param  termStr Term that contains one or more wildcard
-   *                 characters (? or *), but is not simple prefix term
-   *
-   * @return Resulting {@link Query} built for the term
-   */
-  @Override
-  protected Query getWildcardQuery(String field, String termStr) throws ParseException {
-
-    if (termStr == null){
-      //can't imagine this would ever happen
-      throw new ParseException("Passed null value as term to getWildcardQuery");
-    }
-    if ( ! getAllowLeadingWildcard() && (termStr.startsWith("*") || termStr.startsWith("?"))) {
-      throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"
-                              + " unless getAllowLeadingWildcard() returns true");
-    }
-    
-    Matcher wildcardMatcher = wildcardPattern.matcher(termStr);
-    StringBuilder sb = new StringBuilder();
-    int last = 0;
-  
-    while (wildcardMatcher.find()){
-      // continue if escaped char
-      if (wildcardMatcher.group(1) != null){
-        continue;
-      }
-     
-      if (wildcardMatcher.start() > 0){
-        String chunk = termStr.substring(last, wildcardMatcher.start());
-        String analyzed = analyzeSingleChunk(field, termStr, chunk);
-        sb.append(analyzed);
-      }
-      //append the wildcard character
-      sb.append(wildcardMatcher.group(2));
-     
-      last = wildcardMatcher.end();
-    }
-    if (last < termStr.length()){
-      sb.append(analyzeSingleChunk(field, termStr, termStr.substring(last)));
-    }
-    return super.getWildcardQuery(field, sb.toString());
-  }
-  
-  /**
-   * Called when parser parses an input term
-   * that uses prefix notation; that is, contains a single '*' wildcard
-   * character as its last character. Since this is a special case
-   * of generic wildcard term, and such a query can be optimized easily,
-   * this usually results in a different query object.
-   * <p>
-   * Depending on analyzer and settings, a prefix term may (most probably will)
-   * be lower-cased automatically. It <b>will</b> go through the default Analyzer.
-   * <p>
-   * Overrides super class, by passing terms through analyzer.
-   *
-   * @param  field   Name of the field query will use.
-   * @param  termStr Term to use for building term for the query
-   *                 (<b>without</b> trailing '*' character!)
-   *
-   * @return Resulting {@link Query} built for the term
-   */
-  @Override
-  protected Query getPrefixQuery(String field, String termStr) throws ParseException {
-    String analyzed = analyzeSingleChunk(field, termStr, termStr);
-    return super.getPrefixQuery(field, analyzed);
-  }
-
-  /**
-   * Called when parser parses an input term that has the fuzzy suffix (~) appended.
-   * <p>
-   * Depending on analyzer and settings, a fuzzy term may (most probably will)
-   * be lower-cased automatically. It <b>will</b> go through the default Analyzer.
-   * <p>
-   * Overrides super class, by passing terms through analyzer.
-   *
-   * @param field Name of the field query will use.
-   * @param termStr Term to use for building term for the query
-   *
-   * @return Resulting {@link Query} built for the term
-   */
-  @Override
-  protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
-      throws ParseException {
-   
-    String analyzed = analyzeSingleChunk(field, termStr, termStr);
-    return super.getFuzzyQuery(field, analyzed, minSimilarity);
-  }
-
-  /**
-   * Returns the analyzed form for the given chunk
-   * 
-   * If the analyzer produces more than one output token from the given chunk,
-   * a ParseException is thrown.
-   *
-   * @param field The target field
-   * @param termStr The full term from which the given chunk is excerpted
-   * @param chunk The portion of the given termStr to be analyzed
-   * @return The result of analyzing the given chunk
-   * @throws ParseException when analysis returns other than one output token
-   */
-  protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
-    String analyzed = null;
-    try (TokenStream stream = getAnalyzer().tokenStream(field, chunk)) {
-      stream.reset();
-      CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
-      // get first and hopefully only output token
-      if (stream.incrementToken()) {
-        analyzed = termAtt.toString();
-        
-        // try to increment again, there should only be one output token
-        StringBuilder multipleOutputs = null;
-        while (stream.incrementToken()) {
-          if (null == multipleOutputs) {
-            multipleOutputs = new StringBuilder();
-            multipleOutputs.append('"');
-            multipleOutputs.append(analyzed);
-            multipleOutputs.append('"');
-          }
-          multipleOutputs.append(',');
-          multipleOutputs.append('"');
-          multipleOutputs.append(termAtt.toString());
-          multipleOutputs.append('"');
-        }
-        stream.end();
-        if (null != multipleOutputs) {
-          throw new ParseException(
-              String.format(getLocale(),
-                  "Analyzer created multiple terms for \"%s\": %s", chunk, multipleOutputs.toString()));
-        }
-      } else {
-        // nothing returned by analyzer.  Was it a stop word and the user accidentally
-        // used an analyzer with stop words?
-        stream.end();
-        throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
-      }
-    } catch (IOException e){
-      throw new ParseException(
-          String.format(getLocale(), "IO error while trying to analyze single term: \"%s\"", termStr));
-    }
-    return analyzed;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e92a38af/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/package-info.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/package-info.java
deleted file mode 100644
index 77397b4..0000000
--- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- 
-/** 
- * QueryParser that passes Fuzzy-, Prefix-, Range-, and WildcardQuerys through the given analyzer.
- */
-package org.apache.lucene.queryparser.analyzing;
-