You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/14 20:38:30 UTC
[11/12] lucene-solr:master: LUCENE-7318: graduate StandardAnalyzer
and make it the default for IndexWriterConfig
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
index d14ad44..f6ba905 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@@ -20,10 +20,10 @@ package org.apache.lucene.analysis.miscellaneous;
import java.io.IOException;
import java.util.Collection;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
/**
* A filter to apply normal capitalization rules to Tokens. It will make the first letter
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
index 0301fa5..0397de7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
@@ -17,16 +17,16 @@
package org.apache.lucene.analysis.miscellaneous;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
-
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
/**
* Factory for {@link CapitalizationFilter}.
* <p>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
index 40cd210..b086c62 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
@@ -17,8 +17,8 @@
package org.apache.lucene.analysis.miscellaneous;
+import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
index df82ff1..bde0e59 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
@@ -21,9 +21,9 @@ import java.text.DateFormat;
import java.text.ParseException;
import java.util.Locale;
+import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
/** Filters all tokens that cannot be parsed to a date, using the provided {@link DateFormat}. */
public class DateRecognizerFilter extends FilteringTokenFilter {
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
index 4c8a5c7..7cbd6f8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -27,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.AttributeSource;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
index cb3e331..2255283 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
@@ -16,10 +16,10 @@
*/
package org.apache.lucene.analysis.miscellaneous;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
/**
* A TokenFilter that only keeps tokens with text contained in the
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
index 7ff7834..8967c5b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
@@ -17,15 +17,15 @@
package org.apache.lucene.analysis.miscellaneous;
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
-import java.util.Map;
-import java.io.IOException;
-
/**
* Factory for {@link KeepWordFilter}.
* <pre class="prettyprint">
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
index 69c1aad..5b9f48d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
@@ -21,8 +21,8 @@ import java.io.IOException;
import java.util.Map;
import java.util.regex.Pattern;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
index 0594c63..a18711c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
index a7ef58e..457087c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
import java.io.IOException;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
index c4dbf78..b0d079b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
/**
* Marks terms as keywords via the {@link KeywordAttribute}. Each token
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
index 20e013d..f80ed8a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@@ -16,22 +16,22 @@
*/
package org.apache.lucene.analysis.miscellaneous;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.InPlaceMergeSorter;
-import java.io.IOException;
-import java.util.Arrays;
-
/**
* Splits words into subwords and performs optional transformations on subword
* groups. Words are split into subwords with the following rules:
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
index 2f51a2b..6a15b55 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
@@ -16,13 +16,7 @@
*/
package org.apache.lucene.analysis.miscellaneous;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoaderAware;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -30,7 +24,13 @@ import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.io.IOException;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index da104c9..87465b7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.analysis.CharacterUtils;
import org.apache.lucene.util.AttributeFactory;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index e8b152d..0391425 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -17,27 +17,27 @@
package org.apache.lucene.analysis.nl;
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-
/**
* {@link Analyzer} for Dutch language.
* <p>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index 4110da3..c413793 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
import org.tartarus.snowball.ext.NorwegianStemmer;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index ecdb944..769e142 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.IOUtils;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
index f24cf2a..9fdb73e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@@ -30,7 +30,7 @@ import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
import static org.apache.lucene.analysis.util.StemmerUtil.*;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
index 61475d2..37f044a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@@ -21,8 +21,8 @@ import java.util.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index 7436243..06ff999 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -21,16 +21,16 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.tartarus.snowball.ext.RomanianStemmer;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index db2df8a..dfe8ef3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
import java.nio.charset.StandardCharsets;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.IOUtils;
/**
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
index 1c11e48..06aed49 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@@ -19,11 +19,11 @@ package org.apache.lucene.analysis.snowball;
import java.io.IOException;
+import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
import org.tartarus.snowball.SnowballProgram;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
index 93cf7a4..d598a09 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
@@ -17,13 +17,13 @@
package org.apache.lucene.analysis.snowball;
-import java.util.Map;
import java.io.IOException;
+import java.util.Map;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.util.ResourceLoader;
import org.apache.lucene.analysis.util.ResourceLoaderAware;
import org.apache.lucene.analysis.util.TokenFilterFactory;
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index 43c7dad..dc6c118 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -20,13 +20,13 @@ package org.apache.lucene.analysis.standard;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
/**
* Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
deleted file mode 100644
index ae23dc6..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
-
-/**
- * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
- */
-public final class StandardAnalyzer extends StopwordAnalyzerBase {
-
- /** Default maximum allowed token length */
- public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
- /** An unmodifiable set containing some common English words that are usually not
- useful for searching. */
- public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
- /** Builds an analyzer with the given stop words.
- * @param stopWords stop words */
- public StandardAnalyzer(CharArraySet stopWords) {
- super(stopWords);
- }
-
- /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
- */
- public StandardAnalyzer() {
- this(STOP_WORDS_SET);
- }
-
- /** Builds an analyzer with the stop words from the given reader.
- * @see WordlistLoader#getWordSet(Reader)
- * @param stopwords Reader to read stop words from */
- public StandardAnalyzer(Reader stopwords) throws IOException {
- this(loadStopwordSet(stopwords));
- }
-
- /**
- * Set maximum allowed token length. If a token is seen
- * that exceeds this length then it is discarded. This
- * setting only takes effect the next time tokenStream or
- * tokenStream is called.
- */
- public void setMaxTokenLength(int length) {
- maxTokenLength = length;
- }
-
- /**
- * @see #setMaxTokenLength
- */
- public int getMaxTokenLength() {
- return maxTokenLength;
- }
-
- @Override
- protected TokenStreamComponents createComponents(final String fieldName) {
- final StandardTokenizer src = new StandardTokenizer();
- src.setMaxTokenLength(maxTokenLength);
- TokenStream tok = new StandardFilter(src);
- tok = new LowerCaseFilter(tok);
- tok = new StopFilter(tok, stopwords);
- return new TokenStreamComponents(src, tok) {
- @Override
- protected void setReader(final Reader reader) {
- src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
- super.setReader(reader);
- }
- };
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
deleted file mode 100644
index a470a83..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * Normalizes tokens extracted with {@link StandardTokenizer}.
- */
-public class StandardFilter extends TokenFilter {
-
- public StandardFilter(TokenStream in) {
- super(in);
- }
-
- @Override
- public final boolean incrementToken() throws IOException {
- return input.incrementToken(); // TODO: add some niceties for the new grammar
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
deleted file mode 100644
index 1e143a3..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeFactory;
-
-/** A grammar-based tokenizer constructed with JFlex.
- * <p>
- * This class implements the Word Break rules from the
- * Unicode Text Segmentation algorithm, as specified in
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- * <p>Many applications have specific tokenizer needs. If this tokenizer does
- * not suit your application, please consider copying this source code
- * directory to your project and maintaining your own grammar-based tokenizer.
- */
-
-public final class StandardTokenizer extends Tokenizer {
- /** A private instance of the JFlex-constructed scanner */
- private StandardTokenizerImpl scanner;
-
- // TODO: how can we remove these old types?!
- public static final int ALPHANUM = 0;
- /** @deprecated (3.1) */
- @Deprecated
- public static final int APOSTROPHE = 1;
- /** @deprecated (3.1) */
- @Deprecated
- public static final int ACRONYM = 2;
- /** @deprecated (3.1) */
- @Deprecated
- public static final int COMPANY = 3;
- public static final int EMAIL = 4;
- /** @deprecated (3.1) */
- @Deprecated
- public static final int HOST = 5;
- public static final int NUM = 6;
- /** @deprecated (3.1) */
- @Deprecated
- public static final int CJ = 7;
-
- /** @deprecated (3.1) */
- @Deprecated
- public static final int ACRONYM_DEP = 8;
-
- public static final int SOUTHEAST_ASIAN = 9;
- public static final int IDEOGRAPHIC = 10;
- public static final int HIRAGANA = 11;
- public static final int KATAKANA = 12;
- public static final int HANGUL = 13;
-
- /** String token types that correspond to token type int constants */
- public static final String [] TOKEN_TYPES = new String [] {
- "<ALPHANUM>",
- "<APOSTROPHE>",
- "<ACRONYM>",
- "<COMPANY>",
- "<EMAIL>",
- "<HOST>",
- "<NUM>",
- "<CJ>",
- "<ACRONYM_DEP>",
- "<SOUTHEAST_ASIAN>",
- "<IDEOGRAPHIC>",
- "<HIRAGANA>",
- "<KATAKANA>",
- "<HANGUL>"
- };
-
- public static final int MAX_TOKEN_LENGTH_LIMIT = 1024 * 1024;
-
- private int skippedPositions;
-
- private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
-
- /**
- * Set the max allowed token length. No tokens longer than this are emitted.
- *
- * @throws IllegalArgumentException if the given length is outside of the
- * range [1, {@value #MAX_TOKEN_LENGTH_LIMIT}].
- */
- public void setMaxTokenLength(int length) {
- if (length < 1) {
- throw new IllegalArgumentException("maxTokenLength must be greater than zero");
- } else if (length > MAX_TOKEN_LENGTH_LIMIT) {
- throw new IllegalArgumentException("maxTokenLength may not exceed " + MAX_TOKEN_LENGTH_LIMIT);
- }
- if (length != maxTokenLength) {
- maxTokenLength = length;
- scanner.setBufferSize(length);
- }
- }
-
- /** @see #setMaxTokenLength */
- public int getMaxTokenLength() {
- return maxTokenLength;
- }
-
- /**
- * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches
- * the <code>input</code> to the newly created JFlex scanner.
-
- * See http://issues.apache.org/jira/browse/LUCENE-1068
- */
- public StandardTokenizer() {
- init();
- }
-
- /**
- * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory}
- */
- public StandardTokenizer(AttributeFactory factory) {
- super(factory);
- init();
- }
-
- private void init() {
- this.scanner = new StandardTokenizerImpl(input);
- }
-
- // this tokenizer generates three attributes:
- // term offset, positionIncrement and type
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
- private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
- private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
- @Override
- public final boolean incrementToken() throws IOException {
- clearAttributes();
- skippedPositions = 0;
-
- while(true) {
- int tokenType = scanner.getNextToken();
-
- if (tokenType == StandardTokenizerImpl.YYEOF) {
- return false;
- }
-
- if (scanner.yylength() <= maxTokenLength) {
- posIncrAtt.setPositionIncrement(skippedPositions+1);
- scanner.getText(termAtt);
- final int start = scanner.yychar();
- offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
- typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
- return true;
- } else
- // When we skip a too-long term, we still increment the
- // position increment
- skippedPositions++;
- }
- }
-
- @Override
- public final void end() throws IOException {
- super.end();
- // set final offset
- int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
- offsetAtt.setOffset(finalOffset, finalOffset);
- // adjust any skipped tokens
- posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
- }
-
- @Override
- public void close() throws IOException {
- super.close();
- scanner.yyreset(input);
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- scanner.yyreset(input);
- skippedPositions = 0;
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
deleted file mode 100644
index c8bf9e9..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ /dev/null
@@ -1,818 +0,0 @@
-/* The following code was generated by JFlex 1.6.0 */
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements Word Break rules from the Unicode Text Segmentation
- * algorithm, as specified in
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- * <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- * <li><NUM>: A number</li>
- * <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- * Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- * <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- * <li><HIRAGANA>: A single hiragana character</li>
- * <li><KATAKANA>: A sequence of katakana characters</li>
- * <li><HANGUL>: A sequence of Hangul characters</li>
- * </ul>
- */
-@SuppressWarnings("fallthrough")
-
-public final class StandardTokenizerImpl {
-
- /** This character denotes the end of file */
- public static final int YYEOF = -1;
-
- /** initial size of the lookahead buffer */
- private int ZZ_BUFFERSIZE = 255;
-
- /** lexical states */
- public static final int YYINITIAL = 0;
-
- /**
- * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
- * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
- private static final int ZZ_LEXSTATE[] = {
- 0, 0
- };
-
- /**
- * Translates characters to character classes
- */
- private static final String ZZ_CMAP_PACKED =
- "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
- "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
- "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
- "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
- "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
- "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
- "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
- "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
- "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
- "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
- "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
- "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
- "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
- "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
- "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
- "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
- "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
- "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
- "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
- "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
- "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
- "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
- "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
- "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
- "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
- "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
- "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
- "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
- "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
- "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
- "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
- "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
- "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
- "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
- "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
- "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
- "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
- "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
- "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
- "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
- "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
- "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
- "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
- "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
- "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
- "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
- "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
- "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
- "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
- "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
- "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
- "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
- "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
- "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
- "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
- "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
- "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
- "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
- "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
- "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
- "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
- "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
- "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
- "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
- "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
- "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
- "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
- "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
- "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
- "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
- "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
- "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
- "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
- "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
- "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
- "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
- "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
- "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
- "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
- "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
- "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
- "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
- "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
- "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
- "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
- "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
- "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
- "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
- "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
- "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
- "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
- "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
- "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
- "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
- "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
- "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
- "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
- "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
- "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
- "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
- "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
- "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
- "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
- "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
- "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
- "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
- "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
- "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
- "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
- "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
- "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
- "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
- "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
- "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
- "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
- "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
- "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
- "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
- "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
- "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
- "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
- "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
- "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
- "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
- "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
- "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
- "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
- "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
- "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
- "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
- "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
- "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
- "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
- "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
- "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
- "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
- "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
- "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
- "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
- "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
- "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
-
- /**
- * Translates characters to character classes
- */
- private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
-
- /**
- * Translates DFA states to action switch labels.
- */
- private static final int [] ZZ_ACTION = zzUnpackAction();
-
- private static final String ZZ_ACTION_PACKED_0 =
- "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
- "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
- "\1\4\1\0\2\2\2\0\1\1\1\0";
-
- private static int [] zzUnpackAction() {
- int [] result = new int[24];
- int offset = 0;
- offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackAction(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
-
- /**
- * Translates a state to a row index in the transition table
- */
- private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
-
- private static final String ZZ_ROWMAP_PACKED_0 =
- "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
- "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
- "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
-
- private static int [] zzUnpackRowMap() {
- int [] result = new int[24];
- int offset = 0;
- offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackRowMap(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int high = packed.charAt(i++) << 16;
- result[j++] = high | packed.charAt(i++);
- }
- return j;
- }
-
- /**
- * The transition table of the DFA
- */
- private static final int [] ZZ_TRANS = zzUnpackTrans();
-
- private static final String ZZ_TRANS_PACKED_0 =
- "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
- "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
- "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
- "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
- "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
- "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
- "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
- "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
- "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
- "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
- "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
- "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
- "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
- "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
- "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
- "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
- "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
- "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
- "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
- "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
- "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
- "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
- "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
- "\1\30\1\15\14\0\1\30";
-
- private static int [] zzUnpackTrans() {
- int [] result = new int[396];
- int offset = 0;
- offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackTrans(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- value--;
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
-
- /* error codes */
- private static final int ZZ_UNKNOWN_ERROR = 0;
- private static final int ZZ_NO_MATCH = 1;
- private static final int ZZ_PUSHBACK_2BIG = 2;
-
- /* error messages for the codes above */
- private static final String ZZ_ERROR_MSG[] = {
- "Unkown internal scanner error",
- "Error: could not match input",
- "Error: pushback value was too large"
- };
-
- /**
- * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
- */
- private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
-
- private static final String ZZ_ATTRIBUTE_PACKED_0 =
- "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
- "\2\1\2\0\1\1\1\0";
-
- private static int [] zzUnpackAttribute() {
- int [] result = new int[24];
- int offset = 0;
- offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
- return result;
- }
-
- private static int zzUnpackAttribute(String packed, int offset, int [] result) {
- int i = 0; /* index in packed string */
- int j = offset; /* index in unpacked array */
- int l = packed.length();
- while (i < l) {
- int count = packed.charAt(i++);
- int value = packed.charAt(i++);
- do result[j++] = value; while (--count > 0);
- }
- return j;
- }
-
- /** the input device */
- private java.io.Reader zzReader;
-
- /** the current state of the DFA */
- private int zzState;
-
- /** the current lexical state */
- private int zzLexicalState = YYINITIAL;
-
- /** this buffer contains the current text to be matched and is
- the source of the yytext() string */
- private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
-
- /** the textposition at the last accepting state */
- private int zzMarkedPos;
-
- /** the current text position in the buffer */
- private int zzCurrentPos;
-
- /** startRead marks the beginning of the yytext() string in the buffer */
- private int zzStartRead;
-
- /** endRead marks the last character in the buffer, that has been read
- from input */
- private int zzEndRead;
-
- /** number of newlines encountered up to the start of the matched text */
- private int yyline;
-
- /** the number of characters up to the start of the matched text */
- private int yychar;
-
- /**
- * the number of characters from the last newline up to the start of the
- * matched text
- */
- private int yycolumn;
-
- /**
- * zzAtBOL == true <=> the scanner is currently at the beginning of a line
- */
- private boolean zzAtBOL = true;
-
- /** zzAtEOF == true <=> the scanner is at the EOF */
- private boolean zzAtEOF;
-
- /** denotes if the user-EOF-code has already been executed */
- private boolean zzEOFDone;
-
- /**
- * The number of occupied positions in zzBuffer beyond zzEndRead.
- * When a lead/high surrogate has been read from the input stream
- * into the final zzBuffer position, this will have a value of 1;
- * otherwise, it will have a value of 0.
- */
- private int zzFinalHighSurrogate = 0;
-
- /* user code: */
- /** Alphanumeric sequences */
- public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-
- /** Numbers */
- public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
- public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-
- public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-
- public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-
- public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-
- public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
- public final int yychar()
- {
- return yychar;
- }
-
- /**
- * Fills CharTermAttribute with the current token text.
- */
- public final void getText(CharTermAttribute t) {
- t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
- }
-
- /**
- * Sets the scanner buffer size in chars
- */
- public final void setBufferSize(int numChars) {
- ZZ_BUFFERSIZE = numChars;
- char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
- System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
- zzBuffer = newZzBuffer;
- }
-
-
- /**
- * Creates a new scanner
- *
- * @param in the java.io.Reader to read input from.
- */
- public StandardTokenizerImpl(java.io.Reader in) {
- this.zzReader = in;
- }
-
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
- private static char [] zzUnpackCMap(String packed) {
- char [] map = new char[0x110000];
- int i = 0; /* index in packed string */
- int j = 0; /* index in unpacked array */
- while (i < 2836) {
- int count = packed.charAt(i++);
- char value = packed.charAt(i++);
- do map[j++] = value; while (--count > 0);
- }
- return map;
- }
-
-
- /**
- * Refills the input buffer.
- *
- * @return <code>false</code>, iff there was new input.
- *
- * @exception java.io.IOException if any I/O-Error occurs
- */
- private boolean zzRefill() throws java.io.IOException {
-
- /* first: make room (if you can) */
- if (zzStartRead > 0) {
- zzEndRead += zzFinalHighSurrogate;
- zzFinalHighSurrogate = 0;
- System.arraycopy(zzBuffer, zzStartRead,
- zzBuffer, 0,
- zzEndRead-zzStartRead);
-
- /* translate stored positions */
- zzEndRead-= zzStartRead;
- zzCurrentPos-= zzStartRead;
- zzMarkedPos-= zzStartRead;
- zzStartRead = 0;
- }
-
-
- /* fill the buffer with new input */
- int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;
- int totalRead = 0;
- while (totalRead < requested) {
- int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
- if (numRead == -1) {
- break;
- }
- totalRead += numRead;
- }
-
- if (totalRead > 0) {
- zzEndRead += totalRead;
- if (totalRead == requested) { /* possibly more input available */
- if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
- --zzEndRead;
- zzFinalHighSurrogate = 1;
- if (totalRead == 1) { return true; }
- }
- }
- return false;
- }
-
- // totalRead = 0: End of stream
- return true;
- }
-
-
- /**
- * Closes the input stream.
- */
- public final void yyclose() throws java.io.IOException {
- zzAtEOF = true; /* indicate end of file */
- zzEndRead = zzStartRead; /* invalidate buffer */
-
- if (zzReader != null)
- zzReader.close();
- }
-
-
- /**
- * Resets the scanner to read from a new input stream.
- * Does not close the old reader.
- *
- * All internal variables are reset, the old input stream
- * <b>cannot</b> be reused (internal buffer is discarded and lost).
- * Lexical state is set to <tt>ZZ_INITIAL</tt>.
- *
- * Internal scan buffer is resized down to its initial length, if it has grown.
- *
- * @param reader the new input stream
- */
- public final void yyreset(java.io.Reader reader) {
- zzReader = reader;
- zzAtBOL = true;
- zzAtEOF = false;
- zzEOFDone = false;
- zzEndRead = zzStartRead = 0;
- zzCurrentPos = zzMarkedPos = 0;
- zzFinalHighSurrogate = 0;
- yyline = yychar = yycolumn = 0;
- zzLexicalState = YYINITIAL;
- if (zzBuffer.length > ZZ_BUFFERSIZE)
- zzBuffer = new char[ZZ_BUFFERSIZE];
- }
-
-
- /**
- * Returns the current lexical state.
- */
- public final int yystate() {
- return zzLexicalState;
- }
-
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
- public final void yybegin(int newState) {
- zzLexicalState = newState;
- }
-
-
- /**
- * Returns the text matched by the current regular expression.
- */
- public final String yytext() {
- return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
- }
-
-
- /**
- * Returns the character at position <tt>pos</tt> from the
- * matched text.
- *
- * It is equivalent to yytext().charAt(pos), but faster
- *
- * @param pos the position of the character to fetch.
- * A value from 0 to yylength()-1.
- *
- * @return the character at position pos
- */
- public final char yycharat(int pos) {
- return zzBuffer[zzStartRead+pos];
- }
-
-
- /**
- * Returns the length of the matched text region.
- */
- public final int yylength() {
- return zzMarkedPos-zzStartRead;
- }
-
-
- /**
- * Reports an error that occured while scanning.
- *
- * In a wellformed scanner (no or only correct usage of
- * yypushback(int) and a match-all fallback rule) this method
- * will only be called with things that "Can't Possibly Happen".
- * If this method is called, something is seriously wrong
- * (e.g. a JFlex bug producing a faulty scanner etc.).
- *
- * Usual syntax/scanner level error handling should be done
- * in error fallback rules.
- *
- * @param errorCode the code of the errormessage to display
- */
- private void zzScanError(int errorCode) {
- String message;
- try {
- message = ZZ_ERROR_MSG[errorCode];
- }
- catch (ArrayIndexOutOfBoundsException e) {
- message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
- }
-
- throw new Error(message);
- }
-
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than yylength()!
- */
- public void yypushback(int number) {
- if ( number > yylength() )
- zzScanError(ZZ_PUSHBACK_2BIG);
-
- zzMarkedPos -= number;
- }
-
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception java.io.IOException if any I/O-Error occurs
- */
- public int getNextToken() throws java.io.IOException {
- int zzInput;
- int zzAction;
-
- // cached fields:
- int zzCurrentPosL;
- int zzMarkedPosL;
- int zzEndReadL = zzEndRead;
- char [] zzBufferL = zzBuffer;
- char [] zzCMapL = ZZ_CMAP;
-
- int [] zzTransL = ZZ_TRANS;
- int [] zzRowMapL = ZZ_ROWMAP;
- int [] zzAttrL = ZZ_ATTRIBUTE;
-
- while (true) {
- zzMarkedPosL = zzMarkedPos;
-
- yychar+= zzMarkedPosL-zzStartRead;
-
- zzAction = -1;
-
- zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
-
- zzState = ZZ_LEXSTATE[zzLexicalState];
-
- // set up zzAction for empty match case:
- int zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- }
-
-
- zzForAction: {
- while (true) {
-
- if (zzCurrentPosL < zzEndReadL) {
- zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
- zzCurrentPosL += Character.charCount(zzInput);
- }
- else if (zzAtEOF) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- // store back cached positions
- zzCurrentPos = zzCurrentPosL;
- zzMarkedPos = zzMarkedPosL;
- boolean eof = zzRefill();
- // get translated positions and possibly new buffer
- zzCurrentPosL = zzCurrentPos;
- zzMarkedPosL = zzMarkedPos;
- zzBufferL = zzBuffer;
- zzEndReadL = zzEndRead;
- if (eof) {
- zzInput = YYEOF;
- break zzForAction;
- }
- else {
- zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
- zzCurrentPosL += Character.charCount(zzInput);
- }
- }
- int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
- if (zzNext == -1) break zzForAction;
- zzState = zzNext;
-
- zzAttributes = zzAttrL[zzState];
- if ( (zzAttributes & 1) == 1 ) {
- zzAction = zzState;
- zzMarkedPosL = zzCurrentPosL;
- if ( (zzAttributes & 8) == 8 ) break zzForAction;
- }
-
- }
- }
-
- // store back cached position
- zzMarkedPos = zzMarkedPosL;
-
- switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 1:
- { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
- }
- case 9: break;
- case 2:
- { return WORD_TYPE;
- }
- case 10: break;
- case 3:
- { return HANGUL_TYPE;
- }
- case 11: break;
- case 4:
- { return NUMERIC_TYPE;
- }
- case 12: break;
- case 5:
- { return KATAKANA_TYPE;
- }
- case 13: break;
- case 6:
- { return IDEOGRAPHIC_TYPE;
- }
- case 14: break;
- case 7:
- { return HIRAGANA_TYPE;
- }
- case 15: break;
- case 8:
- { return SOUTH_EAST_ASIAN_TYPE;
- }
- case 16: break;
- default:
- if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
- zzAtEOF = true;
- {
- return YYEOF;
- }
- }
- else {
- zzScanError(ZZ_NO_MATCH);
- }
- }
- }
- }
-
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
deleted file mode 100644
index 34f4ead..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements Word Break rules from the Unicode Text Segmentation
- * algorithm, as specified in
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- * <li><ALPHANUM>: A sequence of alphabetic and numeric characters</li>
- * <li><NUM>: A number</li>
- * <li><SOUTHEAST_ASIAN>: A sequence of characters from South and Southeast
- * Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- * <li><IDEOGRAPHIC>: A single CJKV ideographic character</li>
- * <li><HIRAGANA>: A single hiragana character</li>
- * <li><KATAKANA>: A sequence of katakana characters</li>
- * <li><HANGUL>: A sequence of Hangul characters</li>
- * </ul>
- */
-@SuppressWarnings("fallthrough")
-%%
-
-%unicode 6.3
-%integer
-%final
-%public
-%class StandardTokenizerImpl
-%function getNextToken
-%char
-%buffer 255
-
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx = [\p{WB:HebrewLetter}\p{WB:ALetter}] [\p{WB:Format}\p{WB:Extend}]*
-NumericEx = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]] [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx = \p{WB:Katakana} [\p{WB:Format}\p{WB:Extend}]*
-MidLetterEx = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}] [\p{WB:Format}\p{WB:Extend}]*
-MidNumericEx = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}] [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx = \p{WB:ExtendNumLet} [\p{WB:Format}\p{WB:Extend}]*
-HanEx = \p{Script:Han} [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx = \p{Script:Hiragana} [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx = \p{WB:Single_Quote} [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx = \p{WB:Double_Quote} [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx = \p{WB:Hebrew_Letter} [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator} [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx = \p{LB:Complex_Context} [\p{WB:Format}\p{WB:Extend}]*
-
-%{
- /** Alphanumeric sequences */
- public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-
- /** Numbers */
- public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-
- /**
- * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
- * scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
- * together as as a single token rather than broken up, because the logic
- * required to break them at word boundaries is too complex for UAX#29.
- * <p>
- * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
- */
- public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-
- public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-
- public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-
- public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-
- public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
- public final int yychar()
- {
- return yychar;
- }
-
- /**
- * Fills CharTermAttribute with the current token text.
- */
- public final void getText(CharTermAttribute t) {
- t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
- }
-
- /**
- * Sets the scanner buffer size in chars
- */
- public final void setBufferSize(int numChars) {
- ZZ_BUFFERSIZE = numChars;
- char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
- System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
- zzBuffer = newZzBuffer;
- }
-%}
-
-%%
-
-// UAX#29 WB1. sot �
-// WB2. � eot
-//
-<<EOF>> { return YYEOF; }
-
-// UAX#29 WB8. Numeric � Numeric
-// WB11. Numeric (MidNum | MidNumLet | Single_Quote) � Numeric
-// WB12. Numeric � (MidNum | MidNumLet | Single_Quote) Numeric
-// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
-// WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}*
- { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
- { return HANGUL_TYPE; }
-
-{KatakanaEx}+
- { return KATAKANA_TYPE; }
-
-// UAX#29 WB5. (ALetter | Hebrew_Letter) � (ALetter | Hebrew_Letter)
-// WB6. (ALetter | Hebrew_Letter) � (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-// WB7. (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) � (ALetter | Hebrew_Letter)
-// WB7a. Hebrew_Letter � Single_Quote
-// WB7b. Hebrew_Letter � Double_Quote Hebrew_Letter
-// WB7c. Hebrew_Letter Double_Quote � Hebrew_Letter
-// WB9. (ALetter | Hebrew_Letter) � Numeric
-// WB10. Numeric � (ALetter | Hebrew_Letter)
-// WB13. Katakana � Katakana
-// WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
-// WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana)
-//
-{ExtendNumLetEx}* ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
- | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
- | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )*
- | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} ) {HebrewOrALetterEx} )*
- )+
- )
-({ExtendNumLetEx}+ ( {KatakanaEx} ( {ExtendNumLetEx}* {KatakanaEx} )*
- | ( {HebrewLetterEx} ( {SingleQuoteEx} | {DoubleQuoteEx} {HebrewLetterEx} )
- | {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )*
- | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx} ) {HebrewOrALetterEx} )*
- )+
- )
-)*
-{ExtendNumLetEx}*
- { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-// [C]haracters with the Line_Break property values of Contingent_Break (CB),
-// Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
-// boundary property values based on criteria outside of the scope of this
-// annex. That means that satisfactory treatment of languages like Chinese
-// or Thai requires special handling.
-//
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC ( \ufffc ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together. This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-// http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14. Any � Any
-//
-{HanEx} { return IDEOGRAPHIC_TYPE; }
-{HiraganaEx} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3. CR � LF
-// WB3a. (Newline | CR | LF) �
-// WB3b. � (Newline | CR | LF)
-// WB13c. Regional_Indicator � Regional_Indicator
-// WB14. Any � Any
-//
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
- { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/87016b5f/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index 1fc2d7c..9994884 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -20,18 +20,18 @@ package org.apache.lucene.analysis.standard;
import java.io.IOException;
import java.io.Reader;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
/**
* Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
* with {@link org.apache.lucene.analysis.standard.StandardFilter},
- * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and
- * {@link org.apache.lucene.analysis.core.StopFilter}, using a list of
+ * {@link org.apache.lucene.analysis.LowerCaseFilter} and
+ * {@link org.apache.lucene.analysis.StopFilter}, using a list of
* English stop words.
*/
public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
@@ -59,7 +59,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
}
/** Builds an analyzer with the stop words from the given reader.
- * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader)
+ * @see org.apache.lucene.analysis.WordlistLoader#getWordSet(java.io.Reader)
* @param stopwords Reader to read stop words from */
public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException {
this(loadStopwordSet(stopwords));