You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/14 22:51:13 UTC

[01/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6x 45d2d2e7d -> ba9221483


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
new file mode 100644
index 0000000..3004035
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
@@ -0,0 +1,232 @@
+#!/usr/bin/perl
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+use warnings;
+use strict;
+use File::Spec;
+use Getopt::Long;
+use LWP::UserAgent;
+
+my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
+
+my $version = '';
+unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
+  print STDERR "Usage: $script_name -v <version>\n";
+  print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
+      if ($version);
+  exit 1;
+}
+my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
+my $scripts_url = "${url_prefix}/Scripts.txt";
+my $line_break_url = "${url_prefix}/LineBreak.txt";
+my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
+my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
+my $underscore_version = $version;
+$underscore_version =~ s/\./_/g;
+my $class_name = "WordBreakTestUnicode_${underscore_version}";
+my $output_filename = "${class_name}.java";
+my $header =<<"__HEADER__";
+package org.apache.lucene.analysis;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by ${script_name}
+ * from: ${url_prefix}/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \\p{Script = Han}                (From $scripts_url)
+ *    \\p{Script = Hiragana}
+ *    \\p{LineBreak = Complex_Context} (From $line_break_url)
+ *    \\p{WordBreak = ALetter}         (From $word_break_url)
+ *    \\p{WordBreak = Hebrew_Letter}
+ *    \\p{WordBreak = Katakana}
+ *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
+ */
+\@Ignore
+public class ${class_name} extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+__HEADER__
+
+my $codepoints = [];
+map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
+# Complex_Context is an alias for 'SA', which is used in LineBreak.txt
+# Using lowercase versions of property value names to allow for case-
+# insensitive comparison with the names in the Unicode data files.
+parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
+parse_Unicode_data_file($scripts_url, $codepoints, 
+                        {'han' => 1, 'hiragana' => 1});
+parse_Unicode_data_file($word_break_url, $codepoints,
+                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
+my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
+
+my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
+open OUT, ">$output_path"
+  || die "Error opening '$output_path' for writing: $!";
+
+print STDERR "Writing '$output_path'...";
+
+print OUT $header;
+
+for my $line (@tests) {
+  next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
+  # Example line: � 0001 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+  my ($sequence) = $line =~ /^(.*?)\s*\#/;
+  $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
+  print OUT "    // $line\n";
+  $sequence =~ s/\s*�\s*$//; # Trim trailing break character
+  my $test_string = $sequence;
+  $test_string =~ s/\s*�\s*/\\u/g;
+  $test_string =~ s/\s*�\s*/\\u/g;
+  $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
+  $test_string =~ s/\\u000A/\\n/g;
+  $test_string =~ s/\\u000D/\\r/g;
+  $test_string =~ s/\\u0022/\\\"/g;
+  $sequence =~ s/^\s*�\s*//; # Trim leading break character
+  my @tokens = ();
+  for my $candidate (split /\s*�\s*/, $sequence) {
+    my @chars = ();
+    my $has_wanted_char = 0;
+    while ($candidate =~ /([0-9A-F]+)/gi) {
+      my $hexchar = $1;
+      if (4 == length($hexchar)) {
+        push @chars, $hexchar;
+      } else {
+        push @chars, above_BMP_char_to_surrogates($hexchar);
+      }
+      unless ($has_wanted_char) {
+        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
+      }
+    }
+    if ($has_wanted_char) {
+      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
+    }
+  }
+  print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";
+  print OUT "                     new String[] { ";
+  print OUT join(", ", @tokens), " });\n\n";
+}
+
+print OUT "  }\n}\n";
+close OUT;
+print STDERR "done.\n";
+
+
+# sub above_BMP_char_to_surrogates
+#
+# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
+# to the corresponding UTF-16 surrogate pair
+#
+# Assumption: input string is a sequence more than four hex digits
+#
+sub above_BMP_char_to_surrogates {
+  my $ch = hex(shift);
+  my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
+  my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
+  return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
+}
+
+
+# sub parse_Unicode_data_file
+#
+# Downloads and parses the specified Unicode data file, parses it, and
+# extracts code points assigned any of the given property values, defining
+# the corresponding array position in the passed-in target array.
+#
+# Takes in the following parameters:
+#
+#  - URL of the Unicode data file to download and parse
+#  - Reference to target array
+#  - Reference to hash of property values to get code points for
+#
+sub parse_Unicode_data_file {
+  my $url = shift;
+  my $target = shift;
+  my $wanted_property_values = shift;
+  my $content = get_URL_content($url);
+  print STDERR "Parsing '$url'...";
+  my @lines = split /\r?\n/, $content;
+  for (@lines) {
+    s/\s*#.*//;         # Strip trailing comments
+    s/\s+$//;           # Strip trailing space
+    next unless (/\S/); # Skip empty lines
+    my ($start, $end, $property_value);
+    if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
+      # 00AA       ; LATIN
+      $start = $end = hex $1;
+      $property_value = lc $2; # Property value names are case-insensitive
+    } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
+      # 0AE6..0AEF ; Gujarati
+      $start = hex $1;
+      $end = hex $2;
+      $property_value = lc $3; # Property value names are case-insensitive
+    } else {
+      next;
+    }
+    if (defined($wanted_property_values->{$property_value})) {
+      for my $code_point ($start..$end) {
+        $target->[$code_point] = 1;
+      }
+    }
+  }
+  print STDERR "done.\n";
+}
+
+# sub get_URL_content
+#
+# Retrieves and returns the content of the given URL.
+#
+sub get_URL_content {
+  my $url = shift;
+  print STDERR "Retrieving '$url'...";
+  my $user_agent = LWP::UserAgent->new;
+  my $request = HTTP::Request->new(GET => $url);
+  my $response = $user_agent->request($request);
+  unless ($response->is_success) {
+    print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
+    exit 1;
+  }
+  print STDERR "done.\n";
+  return $response->content;
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/package.html
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/package.html b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/package.html
new file mode 100644
index 0000000..f7535b2
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/package.html
@@ -0,0 +1,26 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in spatial/ -->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+Classes to support <code>StandardAnalyzer</code> component testing
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
index 2b21103..3caaf54 100644
--- a/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
+++ b/solr/contrib/clustering/src/java/org/apache/solr/handler/clustering/carrot2/SolrStopwordsCarrot2LexicalDataFactory.java
@@ -20,10 +20,10 @@ import java.util.Collection;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory;
 import org.apache.lucene.analysis.core.StopFilterFactory;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.solr.analysis.TokenizerChain;
 import org.apache.solr.core.SolrCore;
 import org.carrot2.core.LanguageCode;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
index ae6739e..238d387 100644
--- a/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
+++ b/solr/core/src/java/org/apache/solr/core/SolrResourceLoader.java
@@ -16,10 +16,6 @@
  */
 package org.apache.solr.core;
 
-import javax.naming.Context;
-import javax.naming.InitialContext;
-import javax.naming.NamingException;
-import javax.naming.NoInitialContextException;
 import java.io.Closeable;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -48,13 +44,17 @@ import java.util.Properties;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import javax.naming.Context;
+import javax.naming.InitialContext;
+import javax.naming.NamingException;
+import javax.naming.NoInitialContextException;
 
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.util.CharFilterFactory;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.analysis.util.TokenizerFactory;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.codecs.DocValuesFormat;
 import org.apache.lucene.codecs.PostingsFormat;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
index 853cf85..393f662 100644
--- a/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
+++ b/solr/core/src/java/org/apache/solr/rest/schema/analysis/ManagedStopFilterFactory.java
@@ -18,9 +18,9 @@ package org.apache.solr.rest.schema.analysis;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.rest.ManagedResource;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
index d3b0ab0..6e3d82c 100644
--- a/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/DocumentAnalysisRequestHandlerTest.java
@@ -278,11 +278,11 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
     assertNotNull("Expecting the 'StandardFilter' to be applied on the query for the 'text' field", tokenList);
     assertEquals("Query has only one token", 1, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("JUMPING", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1}, null, false));
-    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
+    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.LowerCaseFilter");
     assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the query for the 'text' field", tokenList);
     assertEquals("Query has only one token", 1, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1}, null, false));
-    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.core.StopFilter");
+    tokenList = (List<NamedList>) queryResult.get("org.apache.lucene.analysis.StopFilter");
     assertNotNull("Expecting the 'StopFilter' to be applied on the query for the 'text' field", tokenList);
     assertEquals("Query has only one token", 1, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("jumping", null, "<ALPHANUM>", 0, 7, 1, new int[]{1,1,1,1}, null, false));
@@ -311,7 +311,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
     assertToken(tokenList.get(3), new TokenInfo("Over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4}, null, false));
     assertToken(tokenList.get(4), new TokenInfo("The", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5}, null, false));
     assertToken(tokenList.get(5), new TokenInfo("Dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6}, null, false));
-    tokenList = valueResult.get("org.apache.lucene.analysis.core.LowerCaseFilter");
+    tokenList = valueResult.get("org.apache.lucene.analysis.LowerCaseFilter");
     assertNotNull("Expecting the 'LowerCaseFilter' to be applied on the index for the 'text' field", tokenList);
     assertEquals("Expecting 6 tokens", 6, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@@ -320,7 +320,7 @@ public class DocumentAnalysisRequestHandlerTest extends AnalysisRequestHandlerTe
     assertToken(tokenList.get(3), new TokenInfo("over", null, "<ALPHANUM>", 15, 19, 4, new int[]{4,4,4}, null, false));
     assertToken(tokenList.get(4), new TokenInfo("the", null, "<ALPHANUM>", 20, 23, 5, new int[]{5,5,5}, null, false));
     assertToken(tokenList.get(5), new TokenInfo("dogs", null, "<ALPHANUM>", 24, 28, 6, new int[]{6,6,6}, null, false));
-    tokenList = valueResult.get("org.apache.lucene.analysis.core.StopFilter");
+    tokenList = valueResult.get("org.apache.lucene.analysis.StopFilter");
     assertNotNull("Expecting the 'StopFilter' to be applied on the index for the 'text' field", tokenList);
     assertEquals("Expecting 4 tokens after stop word removal", 4, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 4, 7, 2, new int[]{2,2,2,2}, null, false));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
index d2ef555..2ed00cc 100644
--- a/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/FieldAnalysisRequestHandlerTest.java
@@ -209,7 +209,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
     assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8}, null, false));
     assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9}, null, true));
     assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
     assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
     assertEquals(tokenList.size(), 10);
     assertToken(tokenList.get(0), new TokenInfo("the", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
@@ -222,7 +222,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
     assertToken(tokenList.get(7), new TokenInfo("lazy", null, "<ALPHANUM>", 34, 38, 8, new int[]{8,8,8}, null, false));
     assertToken(tokenList.get(8), new TokenInfo("brown", null, "<ALPHANUM>", 39, 44, 9, new int[]{9,9,9}, null, true));
     assertToken(tokenList.get(9), new TokenInfo("dogs", null, "<ALPHANUM>", 45, 49, 10, new int[]{10,10,10}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.core.StopFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.StopFilter");
     assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
     assertEquals(tokenList.size(), 8);
     assertToken(tokenList.get(0), new TokenInfo("quick", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2,2}, null, false));
@@ -258,12 +258,12 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
     assertEquals(2, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1}, null, false));
     assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2}, null, false));
-    tokenList = queryPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
+    tokenList = queryPart.get("org.apache.lucene.analysis.LowerCaseFilter");
     assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
     assertEquals(2, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1}, null, false));
     assertToken(tokenList.get(1), new TokenInfo("brown", null, "<ALPHANUM>", 4, 9, 2, new int[]{2,2,2}, null, false));
-    tokenList = queryPart.get("org.apache.lucene.analysis.core.StopFilter");
+    tokenList = queryPart.get("org.apache.lucene.analysis.StopFilter");
     assertNotNull("Expcting StopFilter analysis breakdown", tokenList);
     assertEquals(2, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("fox", null, "<ALPHANUM>", 0, 3, 1, new int[]{1,1,1,1}, null, false));
@@ -416,7 +416,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
     assertToken(tokenList.get(3), new TokenInfo("12", null, "word", 9, 11, 3, new int[]{2,3}, null, false));
     assertToken(tokenList.get(4), new TokenInfo("a", null, "word", 12, 13, 4, new int[]{3,4}, null, false));
     assertToken(tokenList.get(5), new TokenInfo("Test", null, "word", 14, 18, 5, new int[]{4,5}, null, false));
-    tokenList = indexPart.get("org.apache.lucene.analysis.core.LowerCaseFilter");
+    tokenList = indexPart.get("org.apache.lucene.analysis.LowerCaseFilter");
     assertNotNull("Expcting LowerCaseFilter analysis breakdown", tokenList);
     assertEquals(6, tokenList.size());
     assertToken(tokenList.get(0), new TokenInfo("hi", null, "word", 0, 2, 1, new int[]{1,1,1}, null, false));

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
index 65f3242..fdf64ff 100644
--- a/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
+++ b/solr/core/src/test/org/apache/solr/spelling/TestSuggestSpellingConverter.java
@@ -23,13 +23,13 @@ import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.miscellaneous.TrimFilter;
 import org.apache.lucene.analysis.pattern.PatternReplaceFilter;

[08/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
deleted file mode 100644
index 6c6ddc8..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.Arrays;
-import java.util.Random;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockGraphTokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.util.TestUtil;
-
-public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
-
-  // LUCENE-5897: slow tokenization of strings of the form (\p{WB:ExtendNumLet}[\p{WB:Format}\p{WB:Extend}]*)+
-  @Slow
-  public void testLargePartiallyMatchingToken() throws Exception {
-    // TODO: get these lists of chars matching a property from ICU4J
-    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
-    char[] WordBreak_ExtendNumLet_chars = "_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f".toCharArray();
-
-    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
-    int[] WordBreak_Format_chars // only the first char in ranges 
-        = { 0xAD, 0x600, 0x61C, 0x6DD, 0x70F, 0x180E, 0x200E, 0x202A, 0x2060, 0x2066, 0xFEFF,
-            0xFFF9, 0x110BD, 0x1D173, 0xE0001, 0xE0020 };
-
-    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
-    int[] WordBreak_Extend_chars // only the first char in ranges
-        = { 0x300, 0x483, 0x591, 0x5bf, 0x5c1, 0x5c4, 0x5c7, 0x610, 0x64b, 0x670, 0x6d6, 0x6df,
-            0x6e7, 0x6ea, 0x711, 0x730, 0x7a6, 0x7eb, 0x816, 0x81b, 0x825, 0x829, 0x859, 0x8e4,
-            0x900, 0x93a, 0x93e, 0x951, 0x962, 0x981, 0x9bc, 0x9be, 0x9c7, 0x9cb, 0x9d7, 0x9e2,
-            0xa01, 0xa3c, 0xa3e, 0xa47, 0xa4b, 0xa51, 0xa70, 0xa75, 0xa81, 0xabc, 0xabe, 0xac7,
-            0xacb, 0xae2, 0xb01, 0xb3c, 0xb3e, 0xb47, 0xb4b, 0xb56, 0xb62, 0xb82, 0xbbe, 0xbc6,
-            0xbca, 0xbd7, 0xc01, 0xc3e, 0xc46, 0xc4a, 0xc55, 0xc62, 0xc82, 0xcbc, 0xcbe, 0xcc6,
-            0xcca, 0xcd5, 0xce2, 0xd02, 0xd3e, 0xd46, 0xd4a, 0xd57, 0xd62, 0xd82, 0xdca, 0xdcf,
-            0xdd6, 0xdd8, 0xdf2, 0xe31, 0xe34, 0xe47, 0xeb1, 0xeb4, 0xebb, 0xec8, 0xf18, 0xf35,
-            0xf37, 0xf39, 0xf3e, 0xf71, 0xf86, 0xf8d, 0xf99, 0xfc6, 0x102b, 0x1056, 0x105e, 0x1062,
-            0x1067, 0x1071, 0x1082, 0x108f, 0x109a, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772, 0x17b4, 
-            0x17dd, 0x180b, 0x18a9, 0x1920, 0x1930, 0x19b0, 0x19c8, 0x1a17, 0x1a55, 0x1a60, 0x1a7f,
-            0x1b00, 0x1b34, 0x1b6b, 0x1b80, 0x1ba1, 0x1be6, 0x1c24, 0x1cd0, 0x1cd4, 0x1ced, 0x1cf2, 
-            0x1dc0, 0x1dfc, 0x200c, 0x20d0, 0x2cef, 0x2d7f, 0x2de0, 0x302a, 0x3099, 0xa66f, 0xa674,
-            0xa69f, 0xa6f0, 0xa802, 0xa806, 0xa80b, 0xa823, 0xa880, 0xa8b4, 0xa8e0, 0xa926, 0xa947, 
-            0xa980, 0xa9b3, 0xaa29, 0xaa43, 0xaa4c, 0xaa7b, 0xaab0, 0xaab2, 0xaab7, 0xaabe, 0xaac1,
-            0xaaeb, 0xaaf5, 0xabe3, 0xabec, 0xfb1e, 0xfe00, 0xfe20, 0xff9e, 0x101fd, 0x10a01,
-            0x10a05, 0x10a0C, 0x10a38, 0x10a3F, 0x11000, 0x11001, 0x11038, 0x11080, 0x11082,
-            0x110b0, 0x110b3, 0x110b7, 0x110b9, 0x11100, 0x11127, 0x1112c, 0x11180, 0x11182,
-            0x111b3, 0x111b6, 0x111bF, 0x116ab, 0x116ac, 0x116b0, 0x116b6, 0x16f51, 0x16f8f,
-            0x1d165, 0x1d167, 0x1d16d, 0x1d17b, 0x1d185, 0x1d1aa, 0x1d242, 0xe0100 }; 
-        
-    StringBuilder builder = new StringBuilder();
-    int numChars = TestUtil.nextInt(random(), 100 * 1024, 1024 * 1024);
-    for (int i = 0 ; i < numChars ; ) {
-      builder.append(WordBreak_ExtendNumLet_chars[random().nextInt(WordBreak_ExtendNumLet_chars.length)]);
-      ++i;
-      if (random().nextBoolean()) {
-        int numFormatExtendChars = TestUtil.nextInt(random(), 1, 8);
-        for (int j = 0; j < numFormatExtendChars; ++j) {
-          int codepoint;
-          if (random().nextBoolean()) {
-            codepoint = WordBreak_Format_chars[random().nextInt(WordBreak_Format_chars.length)];
-          } else {
-            codepoint = WordBreak_Extend_chars[random().nextInt(WordBreak_Extend_chars.length)];
-          }
-          char[] chars = Character.toChars(codepoint);
-          builder.append(chars);
-          i += chars.length;
-        }
-      }
-    }
-    StandardTokenizer ts = new StandardTokenizer();
-    ts.setReader(new StringReader(builder.toString()));
-    ts.reset();
-    while (ts.incrementToken()) { }
-    ts.end();
-    ts.close();
-
-    int newBufferSize = TestUtil.nextInt(random(), 200, 8192);
-    ts.setMaxTokenLength(newBufferSize); // try a different buffer size
-    ts.setReader(new StringReader(builder.toString()));
-    ts.reset();
-    while (ts.incrementToken()) { }
-    ts.end();
-    ts.close();
-  }
-  
-  public void testHugeDoc() throws IOException {
-    StringBuilder sb = new StringBuilder();
-    char whitespace[] = new char[4094];
-    Arrays.fill(whitespace, ' ');
-    sb.append(whitespace);
-    sb.append("testing 1234");
-    String input = sb.toString();
-    StandardTokenizer tokenizer = new StandardTokenizer();
-    tokenizer.setReader(new StringReader(input));
-    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
-  }
-
-  private Analyzer a;
-  
-  @Override
-  public void setUp() throws Exception {
-    super.setUp();
-    a = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory());
-        return new TokenStreamComponents(tokenizer);
-      }
-    };
-  }
-  
-  @Override
-  public void tearDown() throws Exception {
-    a.close();
-    super.tearDown();
-  }
-
-  public void testArmenian() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b 13 \u0574\u056b\u056c\u056b\u0578\u0576 \u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568 (4,600` \u0570\u0561\u0575\u0565\u0580\u0565\u0576 \u057e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u0578\u0582\u0574) \u0563\u0580\u057e\u0565\u056c \u0565\u0576 \u056f\u0561\u0574\u0561\u057e\u0578\u0580\u0576\u0565\u0580\u056b \u056f\u0578\u0572\u0574\u056b\u0581 \u0578\u0582 \u0570\u0561\u0574\u0561\u0580\u0575\u0561 \u0562\u0578\u056c\u0578\u0580 \u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568 \u056f\u0561\u0580\u0578\u0572 \u0567 \u056d\u0574\u0562\u0561\u0563\u0580\u0565\u056c \u0581\u0561\u0576\u056f\u0561\u0581 \u0574\u0561\u0580\u0564 \u0578\u057e \u056f\u0561\u0580\u0578\u0572 \u0567 \u0562\u0561\u0581\u0565\u056c \u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b \u056f\u0561\u0575\u0584\u0568\u0589",
-        new String[] { "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b", "13", "\u0574\u056b\u056c\u056b\u0578\u0576", "\u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568", "4,600", "\u0570\u0561\u0575\u0565\u0580\u0565\u0576", "\u057e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u0578\u0582\u0574", "\u0563\u0580\u057e\u0565\u056c", "\u0565\u0576", "\u056f\u0561\u0574\u0561\u057e\u0578\u0580\u0576\u0565\u0580\u056b", "\u056f\u0578\u0572\u0574\u056b\u0581", 
-        "\u0578\u0582", "\u0570\u0561\u0574\u0561\u0580\u0575\u0561", "\u0562\u0578\u056c\u0578\u0580", "\u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568", "\u056f\u0561\u0580\u0578\u0572", "\u0567", "\u056d\u0574\u0562\u0561\u0563\u0580\u0565\u056c", "\u0581\u0561\u0576\u056f\u0561\u0581", "\u0574\u0561\u0580\u0564", "\u0578\u057e", "\u056f\u0561\u0580\u0578\u0572", "\u0567", "\u0562\u0561\u0581\u0565\u056c", "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b", "\u056f\u0561\u0575\u0584\u0568" } );
-  }
-  
-  public void testAmharic() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u12ca\u12aa\u1354\u12f5\u12eb \u12e8\u1263\u1208 \u1265\u12d9 \u124b\u1295\u124b \u12e8\u1270\u121f\u120b \u1275\u12ad\u12ad\u1208\u129b\u1293 \u1290\u133b \u1218\u12dd\u1308\u1260 \u12d5\u12cd\u1240\u1275 (\u12a2\u1295\u1233\u12ed\u12ad\u120e\u1352\u12f2\u12eb) \u1290\u12cd\u1362 \u121b\u1295\u129b\u12cd\u121d",
-        new String[] { "\u12ca\u12aa\u1354\u12f5\u12eb", "\u12e8\u1263\u1208", "\u1265\u12d9", "\u124b\u1295\u124b", "\u12e8\u1270\u121f\u120b", "\u1275\u12ad\u12ad\u1208\u129b\u1293", "\u1290\u133b", "\u1218\u12dd\u1308\u1260", "\u12d5\u12cd\u1240\u1275", "\u12a2\u1295\u1233\u12ed\u12ad\u120e\u1352\u12f2\u12eb", "\u1290\u12cd", "\u121b\u1295\u129b\u12cd\u121d" } );
-  }
-  
-  public void testArabic() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0627\u0644\u0641\u064a\u0644\u0645 \u0627\u0644\u0648\u062b\u0627\u0626\u0642\u064a \u0627\u0644\u0623\u0648\u0644 \u0639\u0646 \u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627 \u064a\u0633\u0645\u0649 \"\u0627\u0644\u062d\u0642\u064a\u0642\u0629 \u0628\u0627\u0644\u0623\u0631\u0642\u0627\u0645: \u0642\u0635\u0629 \u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627\" (\u0628\u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629: Truth in Numbers: The Wikipedia Story)\u060c \u0633\u064a\u062a\u0645 \u0625\u0637\u0644\u0627\u0642\u0647 \u0641\u064a 2008.",
-        new String[] { "\u0627\u0644\u0641\u064a\u0644\u0645", "\u0627\u0644\u0648\u062b\u0627\u0626\u0642\u064a", "\u0627\u0644\u0623\u0648\u0644", "\u0639\u0646", "\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627", "\u064a\u0633\u0645\u0649", "\u0627\u0644\u062d\u0642\u064a\u0642\u0629", "\u0628\u0627\u0644\u0623\u0631\u0642\u0627\u0645", "\u0642\u0635\u0629", "\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627",
-        "\u0628\u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "\u0633\u064a\u062a\u0645", "\u0625\u0637\u0644\u0627\u0642\u0647", "\u0641\u064a", "2008" } ); 
-  }
-  
-  public void testAramaic() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0718\u071d\u0729\u071d\u0726\u0715\u071d\u0710 (\u0710\u0722\u0713\u0720\u071d\u0710: Wikipedia) \u0717\u0718 \u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710 \u071a\u0710\u072a\u072c\u0710 \u0715\u0710\u0722\u071b\u072a\u0722\u071b \u0712\u0720\u072b\u0722\u0308\u0710 \u0723\u0713\u071d\u0710\u0308\u0710\u0702 \u072b\u0721\u0717 \u0710\u072c\u0710 \u0721\u0722 \u0721\u0308\u0720\u072c\u0710 \u0715\"\u0718\u071d\u0729\u071d\" \u0718\"\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710\"\u0700",
-        new String[] { "\u0718\u071d\u0729\u071d\u0726\u0715\u071d\u0710", "\u0710\u0722\u0713\u0720\u071d\u0710", "Wikipedia", "\u0717\u0718", "\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710", "\u071a\u0710\u072a\u072c\u0710", "\u0715\u0710\u0722\u071b\u072a\u0722\u071b", "\u0712\u0720\u072b\u0722\u0308\u0710", "\u0723\u0713\u071d\u0710\u0308\u0710", "\u072b\u0721\u0717",
-        "\u0710\u072c\u0710", "\u0721\u0722", "\u0721\u0308\u0720\u072c\u0710", "\u0715", "\u0718\u071d\u0729\u071d", "\u0718", "\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710"});
-  }
-  
-  public void testBengali() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u098f\u0987 \u09ac\u09bf\u09b6\u09cd\u09ac\u0995\u09cb\u09b7 \u09aa\u09b0\u09bf\u099a\u09be\u09b2\u09a8\u09be \u0995\u09b0\u09c7 \u0989\u0987\u0995\u09bf\u09ae\u09bf\u09a1\u09bf\u09af\u09bc\u09be \u09ab\u09be\u0989\u09a8\u09cd\u09a1\u09c7\u09b6\u09a8 (\u098f\u0995\u099f\u09bf \u0985\u09b2\u09be\u09ad\u099c\u09a8\u0995 \u09b8\u0982\u09b8\u09cd\u09a5\u09be)\u0964 \u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be\u09b0 \u09b6\u09c1\u09b0\u09c1 \u09e7\u09eb \u099c\u09be\u09a8\u09c1\u09af\u09bc\u09be\u09b0\u09bf, \u09e8\u09e6\u09e6\u09e7 \u09b8\u09be\u09b2\u09c7\u0964 \u098f\u0996\u09a8 \u09aa\u09b0\u09cd\u09af\u09a8\u09cd\u09a4 \u09e8\u09e6\u09e6\u099f\u09bf\u09b0\u0993 \u09ac\u09c7\u09b6\u09c0 \u09ad\u09be\u09b7\u09be\u09af\u09bc \u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be \u09b0\u09af\u09bc\u09c7\u099b\u09c7\u0964",
-        new String[] { "\u098f\u0987", "\u09ac\u09bf\u09b6\u09cd\u09ac\u0995\u09cb\u09b7", "\u09aa\u09b0\u09bf\u099a\u09be\u09b2\u09a8\u09be", "\u0995\u09b0\u09c7", "\u0989\u0987\u0995\u09bf\u09ae\u09bf\u09a1\u09bf\u09af\u09bc\u09be", "\u09ab\u09be\u0989\u09a8\u09cd\u09a1\u09c7\u09b6\u09a8", "\u098f\u0995\u099f\u09bf", "\u0985\u09b2\u09be\u09ad\u099c\u09a8\u0995", "\u09b8\u0982\u09b8\u09cd\u09a5\u09be", "\u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be\u09b0",
-        "\u09b6\u09c1\u09b0\u09c1", "\u09e7\u09eb", "\u099c\u09be\u09a8\u09c1\u09af\u09bc\u09be\u09b0\u09bf", "\u09e8\u09e6\u09e6\u09e7", "\u09b8\u09be\u09b2\u09c7", "\u098f\u0996\u09a8", "\u09aa\u09b0\u09cd\u09af\u09a8\u09cd\u09a4", "\u09e8\u09e6\u09e6\u099f\u09bf\u09b0\u0993", "\u09ac\u09c7\u09b6\u09c0", "\u09ad\u09be\u09b7\u09be\u09af\u09bc", "\u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be", "\u09b0\u09af\u09bc\u09c7\u099b\u09c7" });
-  }
-  
-  public void testFarsi() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627\u06cc \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u062f\u0631 \u062a\u0627\u0631\u06cc\u062e \u06f2\u06f5 \u062f\u06cc \u06f1\u06f3\u06f7\u06f9 \u0628\u0647 \u0635\u0648\u0631\u062a \u0645\u06a9\u0645\u0644\u06cc \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0634\u0646\u0627\u0645\u0647\u0654 \u062a\u062e\u0635\u0635\u06cc \u0646\u0648\u067e\u062f\u06cc\u0627 \u0646\u0648\u0634\u062a\u0647 \u0634\u062f.",
-        new String[] { "\u0648\u06cc\u06a9\u06cc", "\u067e\u062f\u06cc\u0627\u06cc", "\u0627\u0646\u06af\u0644\u06cc\u0633\u06cc", "\u062f\u0631", "\u062a\u0627\u0631\u06cc\u062e", "\u06f2\u06f5", "\u062f\u06cc", "\u06f1\u06f3\u06f7\u06f9", "\u0628\u0647", "\u0635\u0648\u0631\u062a", "\u0645\u06a9\u0645\u0644\u06cc",
-        "\u0628\u0631\u0627\u06cc", "\u062f\u0627\u0646\u0634\u0646\u0627\u0645\u0647\u0654", "\u062a\u062e\u0635\u0635\u06cc", "\u0646\u0648\u067e\u062f\u06cc\u0627", "\u0646\u0648\u0634\u062a\u0647", "\u0634\u062f" });
-  }
-  
-  public void testGreek() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0393\u03c1\u03ac\u03c6\u03b5\u03c4\u03b1\u03b9 \u03c3\u03b5 \u03c3\u03c5\u03bd\u03b5\u03c1\u03b3\u03b1\u03c3\u03af\u03b1 \u03b1\u03c0\u03cc \u03b5\u03b8\u03b5\u03bb\u03bf\u03bd\u03c4\u03ad\u03c2 \u03bc\u03b5 \u03c4\u03bf \u03bb\u03bf\u03b3\u03b9\u03c3\u03bc\u03b9\u03ba\u03cc wiki, \u03ba\u03ac\u03c4\u03b9 \u03c0\u03bf\u03c5 \u03c3\u03b7\u03bc\u03b1\u03af\u03bd\u03b5\u03b9 \u03cc\u03c4\u03b9 \u03ac\u03c1\u03b8\u03c1\u03b1 \u03bc\u03c0\u03bf\u03c1\u03b5\u03af \u03bd\u03b1 \u03c0\u03c1\u03bf\u03c3\u03c4\u03b5\u03b8\u03bf\u03cd\u03bd \u03ae \u03bd\u03b1 \u03b1\u03bb\u03bb\u03ac\u03be\u03bf\u03c5\u03bd \u03b1\u03c0\u03cc \u03c4\u03bf\u03bd \u03ba\u03b1\u03b8\u03ad\u03bd\u03b1.",
-        new String[] { "\u0393\u03c1\u03ac\u03c6\u03b5\u03c4\u03b1\u03b9", "\u03c3\u03b5", "\u03c3\u03c5\u03bd\u03b5\u03c1\u03b3\u03b1\u03c3\u03af\u03b1", "\u03b1\u03c0\u03cc", "\u03b5\u03b8\u03b5\u03bb\u03bf\u03bd\u03c4\u03ad\u03c2", "\u03bc\u03b5", "\u03c4\u03bf", "\u03bb\u03bf\u03b3\u03b9\u03c3\u03bc\u03b9\u03ba\u03cc", "wiki", "\u03ba\u03ac\u03c4\u03b9", "\u03c0\u03bf\u03c5",
-        "\u03c3\u03b7\u03bc\u03b1\u03af\u03bd\u03b5\u03b9", "\u03cc\u03c4\u03b9", "\u03ac\u03c1\u03b8\u03c1\u03b1", "\u03bc\u03c0\u03bf\u03c1\u03b5\u03af", "\u03bd\u03b1", "\u03c0\u03c1\u03bf\u03c3\u03c4\u03b5\u03b8\u03bf\u03cd\u03bd", "\u03ae", "\u03bd\u03b1", "\u03b1\u03bb\u03bb\u03ac\u03be\u03bf\u03c5\u03bd", "\u03b1\u03c0\u03cc", "\u03c4\u03bf\u03bd", "\u03ba\u03b1\u03b8\u03ad\u03bd\u03b1" });
-  }
-
-  public void testThai() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0e01\u0e32\u0e23\u0e17\u0e35\u0e48\u0e44\u0e14\u0e49\u0e15\u0e49\u0e2d\u0e07\u0e41\u0e2a\u0e14\u0e07\u0e27\u0e48\u0e32\u0e07\u0e32\u0e19\u0e14\u0e35. \u0e41\u0e25\u0e49\u0e27\u0e40\u0e18\u0e2d\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e2b\u0e19? \u0e51\u0e52\u0e53\u0e54",
-        new String[] { "\u0e01\u0e32\u0e23\u0e17\u0e35\u0e48\u0e44\u0e14\u0e49\u0e15\u0e49\u0e2d\u0e07\u0e41\u0e2a\u0e14\u0e07\u0e27\u0e48\u0e32\u0e07\u0e32\u0e19\u0e14\u0e35", "\u0e41\u0e25\u0e49\u0e27\u0e40\u0e18\u0e2d\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e2b\u0e19", "\u0e51\u0e52\u0e53\u0e54" });
-  }
-  
-  public void testLao() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0eaa\u0eb2\u0e97\u0eb2\u0ea5\u0eb0\u0e99\u0eb0\u0ea5\u0eb1\u0e94 \u0e9b\u0eb0\u0e8a\u0eb2\u0e97\u0eb4\u0e9b\u0eb0\u0ec4\u0e95 \u0e9b\u0eb0\u0e8a\u0eb2\u0e8a\u0ebb\u0e99\u0ea5\u0eb2\u0ea7", 
-        new String[] { "\u0eaa\u0eb2\u0e97\u0eb2\u0ea5\u0eb0\u0e99\u0eb0\u0ea5\u0eb1\u0e94", "\u0e9b\u0eb0\u0e8a\u0eb2\u0e97\u0eb4\u0e9b\u0eb0\u0ec4\u0e95", "\u0e9b\u0eb0\u0e8a\u0eb2\u0e8a\u0ebb\u0e99\u0ea5\u0eb2\u0ea7" });
-  }
-  
-  public void testTibetan() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0f66\u0fa3\u0f7c\u0f53\u0f0b\u0f58\u0f5b\u0f7c\u0f51\u0f0b\u0f51\u0f44\u0f0b\u0f63\u0f66\u0f0b\u0f60\u0f51\u0f72\u0f66\u0f0b\u0f56\u0f7c\u0f51\u0f0b\u0f61\u0f72\u0f42\u0f0b\u0f58\u0f72\u0f0b\u0f49\u0f58\u0f66\u0f0b\u0f42\u0f7c\u0f44\u0f0b\u0f60\u0f55\u0f7a\u0f63\u0f0b\u0f51\u0f74\u0f0b\u0f42\u0f4f\u0f7c\u0f44\u0f0b\u0f56\u0f62\u0f0b\u0f67\u0f0b\u0f45\u0f44\u0f0b\u0f51\u0f42\u0f7a\u0f0b\u0f58\u0f5a\u0f53\u0f0b\u0f58\u0f46\u0f72\u0f66\u0f0b\u0f66\u0f7c\u0f0d \u0f0d",
-                     new String[] { "\u0f66\u0fa3\u0f7c\u0f53", "\u0f58\u0f5b\u0f7c\u0f51", "\u0f51\u0f44", "\u0f63\u0f66", "\u0f60\u0f51\u0f72\u0f66", "\u0f56\u0f7c\u0f51", "\u0f61\u0f72\u0f42", 
-                                    "\u0f58\u0f72", "\u0f49\u0f58\u0f66", "\u0f42\u0f7c\u0f44", "\u0f60\u0f55\u0f7a\u0f63", "\u0f51\u0f74", "\u0f42\u0f4f\u0f7c\u0f44", "\u0f56\u0f62", 
-                                    "\u0f67", "\u0f45\u0f44", "\u0f51\u0f42\u0f7a", "\u0f58\u0f5a\u0f53", "\u0f58\u0f46\u0f72\u0f66", "\u0f66\u0f7c" });
-  }
-  
-  /*
-   * For chinese, tokenize as char (these can later form bigrams or whatever)
-   */
-  public void testChinese() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u6211\u662f\u4e2d\u56fd\u4eba\u3002 \uff11\uff12\uff13\uff14 \uff34\uff45\uff53\uff54\uff53 ",
-        new String[] { "\u6211", "\u662f", "\u4e2d", "\u56fd", "\u4eba", "\uff11\uff12\uff13\uff14", "\uff34\uff45\uff53\uff54\uff53"});
-  }
-  
-  public void testEmpty() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
-  }
-  
-  /* test various jira issues this analyzer is related to */
-  
-  public void testLUCENE1545() throws Exception {
-    /*
-     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
-     * The word "mo\u0364chte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
-     * Expected result is only on token "mo\u0364chte".
-     */
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "mo\u0364chte", new String[] { "mo\u0364chte" }); 
-  }
-  
-  /* Tests from StandardAnalyzer, just to show behavior is similar */
-  public void testAlphanumericSA() throws Exception {
-    // alphanumeric tokens
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
-  }
-
-  public void testDelimitersSA() throws Exception {
-    // other delimiters: "-", "/", ","
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
-  }
-
-  public void testApostrophesSA() throws Exception {
-    // internal apostrophes: O'Reilly, you're, O'Reilly's
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
-  }
-
-  public void testNumericSA() throws Exception {
-    // floating point, serial, model numbers, ip addresses, etc.
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
-  }
-
-  public void testTextWithNumbersSA() throws Exception {
-    // numbers
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
-  }
-
-  public void testVariousTextSA() throws Exception {
-    // various
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
-  }
-
-  public void testKoreanSA() throws Exception {
-    // Korean words
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\uc548\ub155\ud558\uc138\uc694 \ud55c\uae00\uc785\ub2c8\ub2e4", new String[]{"\uc548\ub155\ud558\uc138\uc694", "\ud55c\uae00\uc785\ub2c8\ub2e4"});
-  }
-  
-  public void testOffsets() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
-        new String[] {"David", "has", "5000", "bones"},
-        new int[] {0, 6, 10, 15},
-        new int[] {5, 9, 14, 20});
-  }
-  
-  public void testTypes() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
-        new String[] {"David", "has", "5000", "bones"},
-        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
-  }
-  
-  public void testUnicodeWordBreaks() throws Exception {
-    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
-    wordBreakTest.test(a);
-  }
-  
-  public void testSupplementary() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\U00029b05\u8271\u935f\u41f9\u612f\u701b", 
-        new String[] {"\U00029b05", "\u8271", "\u935f", "\u41f9", "\u612f", "\u701b"},
-        new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>" });
-  }
-  
-  public void testKorean() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\ud6c8\ubbfc\uc815\uc74c",
-        new String[] { "\ud6c8\ubbfc\uc815\uc74c" },
-        new String[] { "<HANGUL>" });
-  }
-  
-  public void testJapanese() throws Exception {
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u4eee\u540d\u9063\u3044 \u30ab\u30bf\u30ab\u30ca",
-        new String[] { "\u4eee", "\u540d", "\u9063", "\u3044", "\u30ab\u30bf\u30ab\u30ca" },
-        new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
-  }
-  
-  public void testCombiningMarks() throws Exception {
-    checkOneTerm(a, "\u3055\u3099", "\u3055\u3099"); // hiragana
-    checkOneTerm(a, "\u30b5\u3099", "\u30b5\u3099"); // katakana
-    checkOneTerm(a, "\u58f9\u3099", "\u58f9\u3099"); // ideographic
-    checkOneTerm(a, "\uc544\u3099",  "\uc544\u3099"); // hangul
-  }
-
-  /**
-   * Multiple consecutive chars in \p{WB:MidLetter}, \p{WB:MidNumLet},
-   * and/or \p{MidNum} should trigger a token split.
-   */
-  public void testMid() throws Exception {
-    // ':' is in \p{WB:MidLetter}, which should trigger a split unless there is a Letter char on both sides
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B", new String[] { "A:B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A::B", new String[] { "A", "B" });
-
-    // '.' is in \p{WB:MidNumLet}, which should trigger a split unless there is a Letter or Numeric char on both sides
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2", new String[] { "1.2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B", new String[] { "A.B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1..2", new String[] { "1", "2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A..B", new String[] { "A", "B" });
-
-    // ',' is in \p{WB:MidNum}, which should trigger a split unless there is a Numeric char on both sides
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2", new String[] { "1,2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,,2", new String[] { "1", "2" });
-
-    // Mixed consecutive \p{WB:MidLetter} and \p{WB:MidNumLet} should trigger a split
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.:B", new String[] { "A", "B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:.B", new String[] { "A", "B" });
-
-    // Mixed consecutive \p{WB:MidNum} and \p{WB:MidNumLet} should trigger a split
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,.2", new String[] { "1", "2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.,2", new String[] { "1", "2" });
-
-    // '_' is in \p{WB:ExtendNumLet}
-
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A:B", new String[] { "A:B_A:B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A::B", new String[] { "A:B_A", "B" });
-
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1.2", new String[] { "1.2_1.2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A.B", new String[] { "A.B_A.B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1..2", new String[] { "1.2_1", "2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A..B", new String[] { "A.B_A", "B" });
-
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,2", new String[] { "1,2_1,2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,,2", new String[] { "1,2_1", "2" });
-
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A.:B", new String[] { "C_A", "B" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A:.B", new String[] { "C_A", "B" });
-
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1,.2", new String[] { "3_1", "2" });
-    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new String[] { "3_1", "2" });
-  }
-
-
-
-  /** blast some random strings through the analyzer */
-  public void testRandomStrings() throws Exception {
-    Analyzer analyzer = new StandardAnalyzer();
-    checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
-    analyzer.close();
-  }
-  
-  /** blast some random large strings through the analyzer */
-  public void testRandomHugeStrings() throws Exception {
-    Analyzer analyzer = new StandardAnalyzer();
-    checkRandomData(random(), analyzer, 100*RANDOM_MULTIPLIER, 8192);
-    analyzer.close();
-  }
-
-  // Adds random graph after:
-  public void testRandomHugeStringsGraphAfter() throws Exception {
-    Random random = random();
-    Analyzer analyzer = new Analyzer() {
-      @Override
-      protected TokenStreamComponents createComponents(String fieldName) {
-        Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory());
-        TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer);
-        return new TokenStreamComponents(tokenizer, tokenStream);
-      }
-    };
-    checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
-    analyzer.close();
-  }
-}

[11/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
index d14ad44..f6ba905 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilter.java
@@ -20,10 +20,10 @@ package org.apache.lucene.analysis.miscellaneous;
 import java.io.IOException;
 import java.util.Collection;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /** 
  * A filter to apply normal capitalization rules to Tokens.  It will make the first letter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
index 0301fa5..0397de7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CapitalizationFilterFactory.java
@@ -17,16 +17,16 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
-
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
 /**
  * Factory for {@link CapitalizationFilter}.
  * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
index 40cd210..b086c62 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/CodepointCountFilter.java
@@ -17,8 +17,8 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
index df82ff1..bde0e59 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/DateRecognizerFilter.java
@@ -21,9 +21,9 @@ import java.text.DateFormat;
 import java.text.ParseException;
 import java.util.Locale;
 
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
 
 /** Filters all tokens that cannot be parsed to a date, using the provided {@link DateFormat}. */
 public class DateRecognizerFilter extends FilteringTokenFilter {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
index 4c8a5c7..7cbd6f8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/FingerprintFilter.java
@@ -20,6 +20,7 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -27,7 +28,6 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.AttributeSource;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
index cb3e331..2255283 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilter.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.miscellaneous;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * A TokenFilter that only keeps tokens with text contained in the

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
index 7ff7834..8967c5b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeepWordFilterFactory.java
@@ -17,15 +17,15 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
-import java.util.Map;
-import java.io.IOException;
-
 /**
  * Factory for {@link KeepWordFilter}. 
  * <pre class="prettyprint">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
index 69c1aad..5b9f48d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/KeywordMarkerFilterFactory.java
@@ -21,8 +21,8 @@ import java.io.IOException;
 import java.util.Map;
 import java.util.regex.Pattern;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
index 0594c63..a18711c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/LengthFilter.java
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
 
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
index a7ef58e..457087c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/RemoveDuplicatesTokenFilter.java
@@ -20,7 +20,7 @@ import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 import java.io.IOException;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
index c4dbf78..b0d079b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.miscellaneous;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Marks terms as keywords via the {@link KeywordAttribute}. Each token

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
index 20e013d..f80ed8a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java
@@ -16,22 +16,22 @@
  */ 
 package org.apache.lucene.analysis.miscellaneous;
 
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.InPlaceMergeSorter;
 
-import java.io.IOException;
-import java.util.Arrays;
-
 /**
  * Splits words into subwords and performs optional transformations on subword
  * groups. Words are split into subwords with the following rules:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
index 2f51a2b..6a15b55 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilterFactory.java
@@ -16,13 +16,7 @@
  */
 package org.apache.lucene.analysis.miscellaneous;
 
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoaderAware;
-import org.apache.lucene.analysis.util.TokenFilterFactory;
+import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
@@ -30,7 +24,13 @@ import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.io.IOException;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
 
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index da104c9..87465b7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
+import org.apache.lucene.analysis.CharacterUtils;
 import org.apache.lucene.util.AttributeFactory;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
index e8b152d..0391425 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchAnalyzer.java
@@ -17,27 +17,27 @@
 package org.apache.lucene.analysis.nl;
 
 
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.CharsRefBuilder;
 import org.apache.lucene.util.IOUtils;
 
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-
 /**
  * {@link Analyzer} for Dutch language. 
  * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
index 4110da3..c413793 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/no/NorwegianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.NorwegianStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
index ecdb944..769e142 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
index f24cf2a..9fdb73e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/pt/RSLPStemmerBase.java
@@ -30,7 +30,7 @@ import java.util.Map;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 import static org.apache.lucene.analysis.util.StemmerUtil.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
index 61475d2..37f044a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
@@ -21,8 +21,8 @@ import java.util.*;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.AnalyzerWrapper;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.index.Term;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
index 7436243..06ff999 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ro/RomanianAnalyzer.java
@@ -21,16 +21,16 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.RomanianStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
index db2df8a..dfe8ef3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
index 1c11e48..06aed49 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@@ -19,11 +19,11 @@ package org.apache.lucene.analysis.snowball;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
 import org.tartarus.snowball.SnowballProgram;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
index 93cf7a4..d598a09 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballPorterFilterFactory.java
@@ -17,13 +17,13 @@
 package org.apache.lucene.analysis.snowball;
 
 
-import java.util.Map;
 import java.io.IOException;
+import java.util.Map;
 
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
index 43c7dad..dc6c118 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicAnalyzer.java
@@ -20,13 +20,13 @@ package org.apache.lucene.analysis.standard;
 import java.io.IOException;
 import java.io.Reader;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 
 /**
  * Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
deleted file mode 100644
index ae23dc6..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
-
-/**
- * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
- * LowerCaseFilter} and {@link StopFilter}, using a list of
- * English stop words.
- */
-public final class StandardAnalyzer extends StopwordAnalyzerBase {
-  
-  /** Default maximum allowed token length */
-  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
-  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
-  /** An unmodifiable set containing some common English words that are usually not
-  useful for searching. */
-  public static final CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
-
-  /** Builds an analyzer with the given stop words.
-   * @param stopWords stop words */
-  public StandardAnalyzer(CharArraySet stopWords) {
-    super(stopWords);
-  }
-
-  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
-   */
-  public StandardAnalyzer() {
-    this(STOP_WORDS_SET);
-  }
-
-  /** Builds an analyzer with the stop words from the given reader.
-   * @see WordlistLoader#getWordSet(Reader)
-   * @param stopwords Reader to read stop words from */
-  public StandardAnalyzer(Reader stopwords) throws IOException {
-    this(loadStopwordSet(stopwords));
-  }
-
-  /**
-   * Set maximum allowed token length.  If a token is seen
-   * that exceeds this length then it is discarded.  This
-   * setting only takes effect the next time tokenStream or
-   * tokenStream is called.
-   */
-  public void setMaxTokenLength(int length) {
-    maxTokenLength = length;
-  }
-    
-  /**
-   * @see #setMaxTokenLength
-   */
-  public int getMaxTokenLength() {
-    return maxTokenLength;
-  }
-
-  @Override
-  protected TokenStreamComponents createComponents(final String fieldName) {
-    final StandardTokenizer src = new StandardTokenizer();
-    src.setMaxTokenLength(maxTokenLength);
-    TokenStream tok = new StandardFilter(src);
-    tok = new LowerCaseFilter(tok);
-    tok = new StopFilter(tok, stopwords);
-    return new TokenStreamComponents(src, tok) {
-      @Override
-      protected void setReader(final Reader reader) {
-        src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
-        super.setReader(reader);
-      }
-    };
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
deleted file mode 100644
index a470a83..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-
-/**
- * Normalizes tokens extracted with {@link StandardTokenizer}.
- */
-public class StandardFilter extends TokenFilter {
-  
-  public StandardFilter(TokenStream in) {
-    super(in);
-  }
-  
-  @Override
-  public final boolean incrementToken() throws IOException {
-    return input.incrementToken(); // TODO: add some niceties for the new grammar
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
deleted file mode 100644
index 1e143a3..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.util.AttributeFactory;
-
-/** A grammar-based tokenizer constructed with JFlex.
- * <p>
- * This class implements the Word Break rules from the
- * Unicode Text Segmentation algorithm, as specified in 
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- * <p>Many applications have specific tokenizer needs.  If this tokenizer does
- * not suit your application, please consider copying this source code
- * directory to your project and maintaining your own grammar-based tokenizer.
- */
-
-public final class StandardTokenizer extends Tokenizer {
-  /** A private instance of the JFlex-constructed scanner */
-  private StandardTokenizerImpl scanner;
-
-  // TODO: how can we remove these old types?!
-  public static final int ALPHANUM          = 0;
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int APOSTROPHE        = 1;
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int ACRONYM           = 2;
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int COMPANY           = 3;
-  public static final int EMAIL             = 4;
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int HOST              = 5;
-  public static final int NUM               = 6;
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int CJ                = 7;
-
-  /** @deprecated (3.1) */
-  @Deprecated
-  public static final int ACRONYM_DEP       = 8;
-
-  public static final int SOUTHEAST_ASIAN = 9;
-  public static final int IDEOGRAPHIC = 10;
-  public static final int HIRAGANA = 11;
-  public static final int KATAKANA = 12;
-  public static final int HANGUL = 13;
-  
-  /** String token types that correspond to token type int constants */
-  public static final String [] TOKEN_TYPES = new String [] {
-    "<ALPHANUM>",
-    "<APOSTROPHE>",
-    "<ACRONYM>",
-    "<COMPANY>",
-    "<EMAIL>",
-    "<HOST>",
-    "<NUM>",
-    "<CJ>",
-    "<ACRONYM_DEP>",
-    "<SOUTHEAST_ASIAN>",
-    "<IDEOGRAPHIC>",
-    "<HIRAGANA>",
-    "<KATAKANA>",
-    "<HANGUL>"
-  };
-  
-  public static final int MAX_TOKEN_LENGTH_LIMIT = 1024 * 1024;
-  
-  private int skippedPositions;
-
-  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
-
-  /**
-   * Set the max allowed token length.  No tokens longer than this are emitted.
-   * 
-   * @throws IllegalArgumentException if the given length is outside of the
-   *  range [1, {@value #MAX_TOKEN_LENGTH_LIMIT}].
-   */ 
-  public void setMaxTokenLength(int length) {
-    if (length < 1) {
-      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
-    } else if (length > MAX_TOKEN_LENGTH_LIMIT) {
-      throw new IllegalArgumentException("maxTokenLength may not exceed " + MAX_TOKEN_LENGTH_LIMIT);
-    }
-    if (length != maxTokenLength) {
-      maxTokenLength = length;
-      scanner.setBufferSize(length);
-    }
-  }
-
-  /** @see #setMaxTokenLength */
-  public int getMaxTokenLength() {
-    return maxTokenLength;
-  }
-
-  /**
-   * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}.  Attaches
-   * the <code>input</code> to the newly created JFlex scanner.
-
-   * See http://issues.apache.org/jira/browse/LUCENE-1068
-   */
-  public StandardTokenizer() {
-    init();
-  }
-
-  /**
-   * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} 
-   */
-  public StandardTokenizer(AttributeFactory factory) {
-    super(factory);
-    init();
-  }
-
-  private void init() {
-    this.scanner = new StandardTokenizerImpl(input);
-  }
-
-  // this tokenizer generates three attributes:
-  // term offset, positionIncrement and type
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
-
-  /*
-   * (non-Javadoc)
-   *
-   * @see org.apache.lucene.analysis.TokenStream#next()
-   */
-  @Override
-  public final boolean incrementToken() throws IOException {
-    clearAttributes();
-    skippedPositions = 0;
-
-    while(true) {
-      int tokenType = scanner.getNextToken();
-
-      if (tokenType == StandardTokenizerImpl.YYEOF) {
-        return false;
-      }
-
-      if (scanner.yylength() <= maxTokenLength) {
-        posIncrAtt.setPositionIncrement(skippedPositions+1);
-        scanner.getText(termAtt);
-        final int start = scanner.yychar();
-        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
-        typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
-        return true;
-      } else
-        // When we skip a too-long term, we still increment the
-        // position increment
-        skippedPositions++;
-    }
-  }
-  
-  @Override
-  public final void end() throws IOException {
-    super.end();
-    // set final offset
-    int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
-    offsetAtt.setOffset(finalOffset, finalOffset);
-    // adjust any skipped tokens
-    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
-  }
-
-  @Override
-  public void close() throws IOException {
-    super.close();
-    scanner.yyreset(input);
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    scanner.yyreset(input);
-    skippedPositions = 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
deleted file mode 100644
index c8bf9e9..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ /dev/null
@@ -1,818 +0,0 @@
-/* The following code was generated by JFlex 1.6.0 */
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements Word Break rules from the Unicode Text Segmentation 
- * algorithm, as specified in 
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
- *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
- * </ul>
- */
-@SuppressWarnings("fallthrough")
-
-public final class StandardTokenizerImpl {
-
-  /** This character denotes the end of file */
-  public static final int YYEOF = -1;
-
-  /** initial size of the lookahead buffer */
-  private int ZZ_BUFFERSIZE = 255;
-
-  /** lexical states */
-  public static final int YYINITIAL = 0;
-
-  /**
-   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
-   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
-   *                  at the beginning of a line
-   * l is of the form l = 2*k, k a non negative integer
-   */
-  private static final int ZZ_LEXSTATE[] = { 
-     0, 0
-  };
-
-  /** 
-   * Translates characters to character classes
-   */
-  private static final String ZZ_CMAP_PACKED = 
-    "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
-    "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
-    "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
-    "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
-    "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
-    "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
-    "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
-    "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
-    "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
-    "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
-    "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
-    "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
-    "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
-    "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
-    "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
-    "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
-    "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
-    "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
-    "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
-    "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
-    "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
-    "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
-    "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
-    "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
-    "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
-    "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
-    "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
-    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
-    "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
-    "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
-    "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
-    "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
-    "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
-    "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
-    "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
-    "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
-    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
-    "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
-    "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
-    "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
-    "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
-    "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
-    "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
-    "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
-    "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
-    "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
-    "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
-    "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
-    "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
-    "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
-    "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
-    "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
-    "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
-    "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
-    "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
-    "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
-    "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
-    "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
-    "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
-    "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
-    "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
-    "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
-    "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
-    "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
-    "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
-    "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
-    "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
-    "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
-    "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
-    "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
-    "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
-    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
-    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
-    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
-    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
-    "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
-    "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
-    "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
-    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
-    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
-    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
-    "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
-    "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
-    "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
-    "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
-    "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
-    "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
-    "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
-    "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
-    "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
-    "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
-    "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
-    "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
-    "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
-    "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
-    "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
-    "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
-    "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
-    "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
-    "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
-    "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
-    "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
-    "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
-    "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
-    "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
-    "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
-    "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
-    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
-    "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
-    "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
-    "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
-    "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
-    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
-    "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
-    "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
-    "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
-    "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
-    "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
-    "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
-    "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
-    "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
-    "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
-    "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
-    "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
-    "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
-    "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
-    "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
-    "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
-    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
-    "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
-    "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
-    "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
-    "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
-    "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
-    "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
-    "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
-    "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
-    "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
-    "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
-
-  /** 
-   * Translates characters to character classes
-   */
-  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
-
-  /** 
-   * Translates DFA states to action switch labels.
-   */
-  private static final int [] ZZ_ACTION = zzUnpackAction();
-
-  private static final String ZZ_ACTION_PACKED_0 =
-    "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
-    "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
-    "\1\4\1\0\2\2\2\0\1\1\1\0";
-
-  private static int [] zzUnpackAction() {
-    int [] result = new int[24];
-    int offset = 0;
-    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
-    return result;
-  }
-
-  private static int zzUnpackAction(String packed, int offset, int [] result) {
-    int i = 0;       /* index in packed string  */
-    int j = offset;  /* index in unpacked array */
-    int l = packed.length();
-    while (i < l) {
-      int count = packed.charAt(i++);
-      int value = packed.charAt(i++);
-      do result[j++] = value; while (--count > 0);
-    }
-    return j;
-  }
-
-
-  /** 
-   * Translates a state to a row index in the transition table
-   */
-  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
-
-  private static final String ZZ_ROWMAP_PACKED_0 =
-    "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
-    "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
-    "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
-
-  private static int [] zzUnpackRowMap() {
-    int [] result = new int[24];
-    int offset = 0;
-    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
-    return result;
-  }
-
-  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
-    int i = 0;  /* index in packed string  */
-    int j = offset;  /* index in unpacked array */
-    int l = packed.length();
-    while (i < l) {
-      int high = packed.charAt(i++) << 16;
-      result[j++] = high | packed.charAt(i++);
-    }
-    return j;
-  }
-
-  /** 
-   * The transition table of the DFA
-   */
-  private static final int [] ZZ_TRANS = zzUnpackTrans();
-
-  private static final String ZZ_TRANS_PACKED_0 =
-    "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
-    "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
-    "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
-    "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
-    "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
-    "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
-    "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
-    "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
-    "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
-    "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
-    "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
-    "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
-    "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
-    "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
-    "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
-    "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
-    "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
-    "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
-    "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
-    "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
-    "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
-    "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
-    "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
-    "\1\30\1\15\14\0\1\30";
-
-  private static int [] zzUnpackTrans() {
-    int [] result = new int[396];
-    int offset = 0;
-    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
-    return result;
-  }
-
-  private static int zzUnpackTrans(String packed, int offset, int [] result) {
-    int i = 0;       /* index in packed string  */
-    int j = offset;  /* index in unpacked array */
-    int l = packed.length();
-    while (i < l) {
-      int count = packed.charAt(i++);
-      int value = packed.charAt(i++);
-      value--;
-      do result[j++] = value; while (--count > 0);
-    }
-    return j;
-  }
-
-
-  /* error codes */
-  private static final int ZZ_UNKNOWN_ERROR = 0;
-  private static final int ZZ_NO_MATCH = 1;
-  private static final int ZZ_PUSHBACK_2BIG = 2;
-
-  /* error messages for the codes above */
-  private static final String ZZ_ERROR_MSG[] = {
-    "Unkown internal scanner error",
-    "Error: could not match input",
-    "Error: pushback value was too large"
-  };
-
-  /**
-   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
-   */
-  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
-
-  private static final String ZZ_ATTRIBUTE_PACKED_0 =
-    "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
-    "\2\1\2\0\1\1\1\0";
-
-  private static int [] zzUnpackAttribute() {
-    int [] result = new int[24];
-    int offset = 0;
-    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
-    return result;
-  }
-
-  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
-    int i = 0;       /* index in packed string  */
-    int j = offset;  /* index in unpacked array */
-    int l = packed.length();
-    while (i < l) {
-      int count = packed.charAt(i++);
-      int value = packed.charAt(i++);
-      do result[j++] = value; while (--count > 0);
-    }
-    return j;
-  }
-
-  /** the input device */
-  private java.io.Reader zzReader;
-
-  /** the current state of the DFA */
-  private int zzState;
-
-  /** the current lexical state */
-  private int zzLexicalState = YYINITIAL;
-
-  /** this buffer contains the current text to be matched and is
-      the source of the yytext() string */
-  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
-
-  /** the textposition at the last accepting state */
-  private int zzMarkedPos;
-
-  /** the current text position in the buffer */
-  private int zzCurrentPos;
-
-  /** startRead marks the beginning of the yytext() string in the buffer */
-  private int zzStartRead;
-
-  /** endRead marks the last character in the buffer, that has been read
-      from input */
-  private int zzEndRead;
-
-  /** number of newlines encountered up to the start of the matched text */
-  private int yyline;
-
-  /** the number of characters up to the start of the matched text */
-  private int yychar;
-
-  /**
-   * the number of characters from the last newline up to the start of the 
-   * matched text
-   */
-  private int yycolumn;
-
-  /** 
-   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
-   */
-  private boolean zzAtBOL = true;
-
-  /** zzAtEOF == true <=> the scanner is at the EOF */
-  private boolean zzAtEOF;
-
-  /** denotes if the user-EOF-code has already been executed */
-  private boolean zzEOFDone;
-  
-  /** 
-   * The number of occupied positions in zzBuffer beyond zzEndRead.
-   * When a lead/high surrogate has been read from the input stream
-   * into the final zzBuffer position, this will have a value of 1;
-   * otherwise, it will have a value of 0.
-   */
-  private int zzFinalHighSurrogate = 0;
-
-  /* user code: */
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-  
-  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-  
-  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-  
-  public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-  
-  public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
-  public final int yychar()
-  {
-    return yychar;
-  }
-
-  /**
-   * Fills CharTermAttribute with the current token text.
-   */
-  public final void getText(CharTermAttribute t) {
-    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-  
-  /**
-   * Sets the scanner buffer size in chars
-   */
-   public final void setBufferSize(int numChars) {
-     ZZ_BUFFERSIZE = numChars;
-     char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
-     System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
-     zzBuffer = newZzBuffer;
-   }
-
-
-  /**
-   * Creates a new scanner
-   *
-   * @param   in  the java.io.Reader to read input from.
-   */
-  public StandardTokenizerImpl(java.io.Reader in) {
-    this.zzReader = in;
-  }
-
-
-  /** 
-   * Unpacks the compressed character translation table.
-   *
-   * @param packed   the packed character translation table
-   * @return         the unpacked character translation table
-   */
-  private static char [] zzUnpackCMap(String packed) {
-    char [] map = new char[0x110000];
-    int i = 0;  /* index in packed string  */
-    int j = 0;  /* index in unpacked array */
-    while (i < 2836) {
-      int  count = packed.charAt(i++);
-      char value = packed.charAt(i++);
-      do map[j++] = value; while (--count > 0);
-    }
-    return map;
-  }
-
-
-  /**
-   * Refills the input buffer.
-   *
-   * @return      <code>false</code>, iff there was new input.
-   * 
-   * @exception   java.io.IOException  if any I/O-Error occurs
-   */
-  private boolean zzRefill() throws java.io.IOException {
-
-    /* first: make room (if you can) */
-    if (zzStartRead > 0) {
-      zzEndRead += zzFinalHighSurrogate;
-      zzFinalHighSurrogate = 0;
-      System.arraycopy(zzBuffer, zzStartRead,
-                       zzBuffer, 0,
-                       zzEndRead-zzStartRead);
-
-      /* translate stored positions */
-      zzEndRead-= zzStartRead;
-      zzCurrentPos-= zzStartRead;
-      zzMarkedPos-= zzStartRead;
-      zzStartRead = 0;
-    }
-
-
-    /* fill the buffer with new input */
-    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;           
-    int totalRead = 0;
-    while (totalRead < requested) {
-      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
-      if (numRead == -1) {
-        break;
-      }
-      totalRead += numRead;
-    }
-
-    if (totalRead > 0) {
-      zzEndRead += totalRead;
-      if (totalRead == requested) { /* possibly more input available */
-        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
-          --zzEndRead;
-          zzFinalHighSurrogate = 1;
-          if (totalRead == 1) { return true; }
-        }
-      }
-      return false;
-    }
-
-    // totalRead = 0: End of stream
-    return true;
-  }
-
-    
-  /**
-   * Closes the input stream.
-   */
-  public final void yyclose() throws java.io.IOException {
-    zzAtEOF = true;            /* indicate end of file */
-    zzEndRead = zzStartRead;  /* invalidate buffer    */
-
-    if (zzReader != null)
-      zzReader.close();
-  }
-
-
-  /**
-   * Resets the scanner to read from a new input stream.
-   * Does not close the old reader.
-   *
-   * All internal variables are reset, the old input stream 
-   * <b>cannot</b> be reused (internal buffer is discarded and lost).
-   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
-   *
-   * Internal scan buffer is resized down to its initial length, if it has grown.
-   *
-   * @param reader   the new input stream 
-   */
-  public final void yyreset(java.io.Reader reader) {
-    zzReader = reader;
-    zzAtBOL  = true;
-    zzAtEOF  = false;
-    zzEOFDone = false;
-    zzEndRead = zzStartRead = 0;
-    zzCurrentPos = zzMarkedPos = 0;
-    zzFinalHighSurrogate = 0;
-    yyline = yychar = yycolumn = 0;
-    zzLexicalState = YYINITIAL;
-    if (zzBuffer.length > ZZ_BUFFERSIZE)
-      zzBuffer = new char[ZZ_BUFFERSIZE];
-  }
-
-
-  /**
-   * Returns the current lexical state.
-   */
-  public final int yystate() {
-    return zzLexicalState;
-  }
-
-
-  /**
-   * Enters a new lexical state
-   *
-   * @param newState the new lexical state
-   */
-  public final void yybegin(int newState) {
-    zzLexicalState = newState;
-  }
-
-
-  /**
-   * Returns the text matched by the current regular expression.
-   */
-  public final String yytext() {
-    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
-  }
-
-
-  /**
-   * Returns the character at position <tt>pos</tt> from the 
-   * matched text. 
-   * 
-   * It is equivalent to yytext().charAt(pos), but faster
-   *
-   * @param pos the position of the character to fetch. 
-   *            A value from 0 to yylength()-1.
-   *
-   * @return the character at position pos
-   */
-  public final char yycharat(int pos) {
-    return zzBuffer[zzStartRead+pos];
-  }
-
-
-  /**
-   * Returns the length of the matched text region.
-   */
-  public final int yylength() {
-    return zzMarkedPos-zzStartRead;
-  }
-
-
-  /**
-   * Reports an error that occured while scanning.
-   *
-   * In a wellformed scanner (no or only correct usage of 
-   * yypushback(int) and a match-all fallback rule) this method 
-   * will only be called with things that "Can't Possibly Happen".
-   * If this method is called, something is seriously wrong
-   * (e.g. a JFlex bug producing a faulty scanner etc.).
-   *
-   * Usual syntax/scanner level error handling should be done
-   * in error fallback rules.
-   *
-   * @param   errorCode  the code of the errormessage to display
-   */
-  private void zzScanError(int errorCode) {
-    String message;
-    try {
-      message = ZZ_ERROR_MSG[errorCode];
-    }
-    catch (ArrayIndexOutOfBoundsException e) {
-      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
-    }
-
-    throw new Error(message);
-  } 
-
-
-  /**
-   * Pushes the specified amount of characters back into the input stream.
-   *
-   * They will be read again by then next call of the scanning method
-   *
-   * @param number  the number of characters to be read again.
-   *                This number must not be greater than yylength()!
-   */
-  public void yypushback(int number)  {
-    if ( number > yylength() )
-      zzScanError(ZZ_PUSHBACK_2BIG);
-
-    zzMarkedPos -= number;
-  }
-
-
-  /**
-   * Resumes scanning until the next regular expression is matched,
-   * the end of input is encountered or an I/O-Error occurs.
-   *
-   * @return      the next token
-   * @exception   java.io.IOException  if any I/O-Error occurs
-   */
-  public int getNextToken() throws java.io.IOException {
-    int zzInput;
-    int zzAction;
-
-    // cached fields:
-    int zzCurrentPosL;
-    int zzMarkedPosL;
-    int zzEndReadL = zzEndRead;
-    char [] zzBufferL = zzBuffer;
-    char [] zzCMapL = ZZ_CMAP;
-
-    int [] zzTransL = ZZ_TRANS;
-    int [] zzRowMapL = ZZ_ROWMAP;
-    int [] zzAttrL = ZZ_ATTRIBUTE;
-
-    while (true) {
-      zzMarkedPosL = zzMarkedPos;
-
-      yychar+= zzMarkedPosL-zzStartRead;
-
-      zzAction = -1;
-
-      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
-  
-      zzState = ZZ_LEXSTATE[zzLexicalState];
-
-      // set up zzAction for empty match case:
-      int zzAttributes = zzAttrL[zzState];
-      if ( (zzAttributes & 1) == 1 ) {
-        zzAction = zzState;
-      }
-
-
-      zzForAction: {
-        while (true) {
-    
-          if (zzCurrentPosL < zzEndReadL) {
-            zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
-            zzCurrentPosL += Character.charCount(zzInput);
-          }
-          else if (zzAtEOF) {
-            zzInput = YYEOF;
-            break zzForAction;
-          }
-          else {
-            // store back cached positions
-            zzCurrentPos  = zzCurrentPosL;
-            zzMarkedPos   = zzMarkedPosL;
-            boolean eof = zzRefill();
-            // get translated positions and possibly new buffer
-            zzCurrentPosL  = zzCurrentPos;
-            zzMarkedPosL   = zzMarkedPos;
-            zzBufferL      = zzBuffer;
-            zzEndReadL     = zzEndRead;
-            if (eof) {
-              zzInput = YYEOF;
-              break zzForAction;
-            }
-            else {
-              zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
-              zzCurrentPosL += Character.charCount(zzInput);
-            }
-          }
-          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
-          if (zzNext == -1) break zzForAction;
-          zzState = zzNext;
-
-          zzAttributes = zzAttrL[zzState];
-          if ( (zzAttributes & 1) == 1 ) {
-            zzAction = zzState;
-            zzMarkedPosL = zzCurrentPosL;
-            if ( (zzAttributes & 8) == 8 ) break zzForAction;
-          }
-
-        }
-      }
-
-      // store back cached position
-      zzMarkedPos = zzMarkedPosL;
-
-      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
-        case 1: 
-          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
-          }
-        case 9: break;
-        case 2: 
-          { return WORD_TYPE;
-          }
-        case 10: break;
-        case 3: 
-          { return HANGUL_TYPE;
-          }
-        case 11: break;
-        case 4: 
-          { return NUMERIC_TYPE;
-          }
-        case 12: break;
-        case 5: 
-          { return KATAKANA_TYPE;
-          }
-        case 13: break;
-        case 6: 
-          { return IDEOGRAPHIC_TYPE;
-          }
-        case 14: break;
-        case 7: 
-          { return HIRAGANA_TYPE;
-          }
-        case 15: break;
-        case 8: 
-          { return SOUTH_EAST_ASIAN_TYPE;
-          }
-        case 16: break;
-        default: 
-          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
-            zzAtEOF = true;
-              {
-                return YYEOF;
-              }
-          } 
-          else {
-            zzScanError(ZZ_NO_MATCH);
-          }
-      }
-    }
-  }
-
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
deleted file mode 100644
index 34f4ead..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * This class implements Word Break rules from the Unicode Text Segmentation 
- * algorithm, as specified in 
- * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
- * <p>
- * Tokens produced are of the following types:
- * <ul>
- *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
- *   <li>&lt;NUM&gt;: A number</li>
- *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
- *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
- *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
- *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
- *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
- *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
- * </ul>
- */
-@SuppressWarnings("fallthrough")
-%%
-
-%unicode 6.3
-%integer
-%final
-%public
-%class StandardTokenizerImpl
-%function getNextToken
-%char
-%buffer 255
-
-// UAX#29 WB4. X (Extend | Format)* --> X
-//
-HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
-HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
-NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
-KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
-MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
-MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
-ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
-HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
-HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
-SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
-HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
-RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
-ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
-
-%{
-  /** Alphanumeric sequences */
-  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
-  
-  /** Numbers */
-  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
-  
-  /**
-   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
-   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
-   * together as as a single token rather than broken up, because the logic
-   * required to break them at word boundaries is too complex for UAX#29.
-   * <p>
-   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
-   */
-  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
-  
-  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
-  
-  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
-  
-  public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
-  
-  public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
-
-  public final int yychar()
-  {
-    return yychar;
-  }
-
-  /**
-   * Fills CharTermAttribute with the current token text.
-   */
-  public final void getText(CharTermAttribute t) {
-    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
-  }
-  
-  /**
-   * Sets the scanner buffer size in chars
-   */
-   public final void setBufferSize(int numChars) {
-     ZZ_BUFFERSIZE = numChars;
-     char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
-     System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
-     zzBuffer = newZzBuffer;
-   }
-%}
-
-%%
-
-// UAX#29 WB1.   sot   �
-//        WB2.     �   eot
-//
-<<EOF>> { return YYEOF; }
-
-// UAX#29 WB8.   Numeric � Numeric
-//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) � Numeric
-//        WB12.  Numeric � (MidNum | MidNumLet | Single_Quote) Numeric
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
-//        WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
-  { return NUMERIC_TYPE; }
-
-// subset of the below for typing purposes only!
-{HangulEx}+
-  { return HANGUL_TYPE; }
-  
-{KatakanaEx}+
-  { return KATAKANA_TYPE; }
-
-// UAX#29 WB5.   (ALetter | Hebrew_Letter) � (ALetter | Hebrew_Letter)
-//        WB6.   (ALetter | Hebrew_Letter) � (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) � (ALetter | Hebrew_Letter)
-//        WB7a.  Hebrew_Letter � Single_Quote
-//        WB7b.  Hebrew_Letter � Double_Quote Hebrew_Letter
-//        WB7c.  Hebrew_Letter Double_Quote � Hebrew_Letter
-//        WB9.   (ALetter | Hebrew_Letter) � Numeric
-//        WB10.  Numeric � (ALetter | Hebrew_Letter)
-//        WB13.  Katakana � Katakana
-//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
-//        WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana) 
-//
-{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
-                     )+
-                   )
-({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
-                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
-                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
-                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
-                     )+
-                   )
-)*
-{ExtendNumLetEx}* 
-  { return WORD_TYPE; }
-
-
-// From UAX #29:
-//
-//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
-//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
-//    boundary property values based on criteria outside of the scope of this
-//    annex.  That means that satisfactory treatment of languages like Chinese
-//    or Thai requires special handling.
-// 
-// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
-// property: U+FFFC ( \ufffc ) OBJECT REPLACEMENT CHARACTER.
-//
-// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
-// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
-// Lao, etc.) are kept together.  This grammar does the same below.
-//
-// See also the Unicode Line Breaking Algorithm:
-//
-//    http://www.unicode.org/reports/tr14/#SA
-//
-{ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
-
-// UAX#29 WB14.  Any � Any
-//
-{HanEx} { return IDEOGRAPHIC_TYPE; }
-{HiraganaEx} { return HIRAGANA_TYPE; }
-
-
-// UAX#29 WB3.   CR � LF
-//        WB3a.  (Newline | CR | LF) �
-//        WB3b.  � (Newline | CR | LF)
-//        WB13c. Regional_Indicator � Regional_Indicator
-//        WB14.  Any � Any
-//
-{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
-  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
index 1fc2d7c..9994884 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailAnalyzer.java
@@ -20,18 +20,18 @@ package org.apache.lucene.analysis.standard;
 import java.io.IOException;
 import java.io.Reader;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
  * with {@link org.apache.lucene.analysis.standard.StandardFilter},
- * {@link org.apache.lucene.analysis.core.LowerCaseFilter} and
- * {@link org.apache.lucene.analysis.core.StopFilter}, using a list of
+ * {@link org.apache.lucene.analysis.LowerCaseFilter} and
+ * {@link org.apache.lucene.analysis.StopFilter}, using a list of
  * English stop words.
  */
 public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
@@ -59,7 +59,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
   }
 
   /** Builds an analyzer with the stop words from the given reader.
-   * @see org.apache.lucene.analysis.util.WordlistLoader#getWordSet(java.io.Reader)
+   * @see org.apache.lucene.analysis.WordlistLoader#getWordSet(java.io.Reader)
    * @param stopwords Reader to read stop words from */
   public UAX29URLEmailAnalyzer(Reader stopwords) throws IOException {
     this(loadStopwordSet(stopwords));

[02/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
new file mode 100644
index 0000000..4a3731e
--- /dev/null
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
@@ -0,0 +1,5537 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.junit.Ignore;
+
+/**
+ * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
+ * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
+ *
+ * WordBreakTest.txt indicates the points in the provided character sequences
+ * at which conforming implementations must and must not break words.  This
+ * class tests for expected token extraction from each of the test sequences
+ * in WordBreakTest.txt, where the expected tokens are those character
+ * sequences bounded by word breaks and containing at least one character
+ * from one of the following character sets:
+ *
+ *    \p{Script = Han}                (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt)
+ *    \p{Script = Hiragana}
+ *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt)
+ *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt)
+ *    \p{WordBreak = Hebrew_Letter}
+ *    \p{WordBreak = Katakana}
+ *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
+ *    [\uFF10-\uFF19]                (Full-width Arabic digits)
+ */
+@Ignore
+public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase {
+
+  public void test(Analyzer analyzer) throws Exception {
+    // � 0001 � 0001 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0001",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 0001 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
+                     new String[] {  });
+
+    // � 0001 � 000D �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\r",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 000D �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
+                     new String[] {  });
+
+    // � 0001 � 000A �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\n",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 000A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
+                     new String[] {  });
+
+    // � 0001 � 000B �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u000B",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 000B �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
+                     new String[] {  });
+
+    // � 0001 � 3031 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u3031",
+                     new String[] { "\u3031" });
+
+    // � 0001 � 0308 � 3031 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // � 0001 � 0041 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0041",
+                     new String[] { "\u0041" });
+
+    // � 0001 � 0308 � 0041 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // � 0001 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u003A",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
+                     new String[] {  });
+
+    // � 0001 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002C",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
+                     new String[] {  });
+
+    // � 0001 � 002E �  #  � [0.2] <START OF HEADING> (Other) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u002E",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 002E �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
+                     new String[] {  });
+
+    // � 0001 � 0030 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0030",
+                     new String[] { "\u0030" });
+
+    // � 0001 � 0308 � 0030 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // � 0001 � 005F �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u005F",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 005F �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
+                     new String[] {  });
+
+    // � 0001 � 1F1E6 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 1F1E6 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 0001 � 05D0 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 0001 � 0308 � 05D0 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 0001 � 0022 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\"",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 0022 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
+                     new String[] {  });
+
+    // � 0001 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0027",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
+                     new String[] {  });
+
+    // � 0001 � 00AD �  #  � [0.2] <START OF HEADING> (Other) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u00AD",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 00AD �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
+                     new String[] {  });
+
+    // � 0001 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0300",
+                     new String[] {  });
+
+    // � 0001 � 0308 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
+                     new String[] {  });
+
+    // � 0001 � 0061 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 0001 � 0308 � 0061 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 0001 � 0061 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0308 � 0061 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0061 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0308 � 0061 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0061 � 0027 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0061 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0308 � 0061 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 0001 � 0031 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0308 � 0031 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0031 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0308 � 0031 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0031 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0308 � 0031 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0031 � 002E � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 0001 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0001 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0001",
+                     new String[] {  });
+
+    // � 000D � 0308 � 0001 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
+                     new String[] {  });
+
+    // � 000D � 000D �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\r",
+                     new String[] {  });
+
+    // � 000D � 0308 � 000D �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\r",
+                     new String[] {  });
+
+    // � 000D � 000A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.0] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\n",
+                     new String[] {  });
+
+    // � 000D � 0308 � 000A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\n",
+                     new String[] {  });
+
+    // � 000D � 000B �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u000B",
+                     new String[] {  });
+
+    // � 000D � 0308 � 000B �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
+                     new String[] {  });
+
+    // � 000D � 3031 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000D � 0308 � 3031 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000D � 0041 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000D � 0308 � 0041 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000D � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u003A",
+                     new String[] {  });
+
+    // � 000D � 0308 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
+                     new String[] {  });
+
+    // � 000D � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002C",
+                     new String[] {  });
+
+    // � 000D � 0308 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
+                     new String[] {  });
+
+    // � 000D � 002E �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u002E",
+                     new String[] {  });
+
+    // � 000D � 0308 � 002E �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
+                     new String[] {  });
+
+    // � 000D � 0030 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000D � 0308 � 0030 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000D � 005F �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u005F",
+                     new String[] {  });
+
+    // � 000D � 0308 � 005F �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
+                     new String[] {  });
+
+    // � 000D � 1F1E6 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000D � 0308 � 1F1E6 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000D � 05D0 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000D � 0308 � 05D0 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000D � 0022 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\"",
+                     new String[] {  });
+
+    // � 000D � 0308 � 0022 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\"",
+                     new String[] {  });
+
+    // � 000D � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0027",
+                     new String[] {  });
+
+    // � 000D � 0308 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
+                     new String[] {  });
+
+    // � 000D � 00AD �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u00AD",
+                     new String[] {  });
+
+    // � 000D � 0308 � 00AD �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
+                     new String[] {  });
+
+    // � 000D � 0300 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0300",
+                     new String[] {  });
+
+    // � 000D � 0308 � 0300 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
+                     new String[] {  });
+
+    // � 000D � 0061 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000D � 0308 � 0061 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000D � 0061 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0308 � 0061 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0061 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0308 � 0061 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0061 � 0027 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0061 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0308 � 0061 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000D � 0031 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0308 � 0031 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0031 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0308 � 0031 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0031 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0308 � 0031 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0031 � 002E � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000D � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0001 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0001",
+                     new String[] {  });
+
+    // � 000A � 0308 � 0001 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
+                     new String[] {  });
+
+    // � 000A � 000D �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\r",
+                     new String[] {  });
+
+    // � 000A � 0308 � 000D �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\r",
+                     new String[] {  });
+
+    // � 000A � 000A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\n",
+                     new String[] {  });
+
+    // � 000A � 0308 � 000A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\n",
+                     new String[] {  });
+
+    // � 000A � 000B �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u000B",
+                     new String[] {  });
+
+    // � 000A � 0308 � 000B �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
+                     new String[] {  });
+
+    // � 000A � 3031 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000A � 0308 � 3031 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000A � 0041 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000A � 0308 � 0041 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000A � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u003A",
+                     new String[] {  });
+
+    // � 000A � 0308 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
+                     new String[] {  });
+
+    // � 000A � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002C",
+                     new String[] {  });
+
+    // � 000A � 0308 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
+                     new String[] {  });
+
+    // � 000A � 002E �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u002E",
+                     new String[] {  });
+
+    // � 000A � 0308 � 002E �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
+                     new String[] {  });
+
+    // � 000A � 0030 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000A � 0308 � 0030 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000A � 005F �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u005F",
+                     new String[] {  });
+
+    // � 000A � 0308 � 005F �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
+                     new String[] {  });
+
+    // � 000A � 1F1E6 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000A � 0308 � 1F1E6 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000A � 05D0 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000A � 0308 � 05D0 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000A � 0022 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\"",
+                     new String[] {  });
+
+    // � 000A � 0308 � 0022 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\"",
+                     new String[] {  });
+
+    // � 000A � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0027",
+                     new String[] {  });
+
+    // � 000A � 0308 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
+                     new String[] {  });
+
+    // � 000A � 00AD �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u00AD",
+                     new String[] {  });
+
+    // � 000A � 0308 � 00AD �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
+                     new String[] {  });
+
+    // � 000A � 0300 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0300",
+                     new String[] {  });
+
+    // � 000A � 0308 � 0300 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
+                     new String[] {  });
+
+    // � 000A � 0061 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000A � 0308 � 0061 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000A � 0061 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0308 � 0061 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0061 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0308 � 0061 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0061 � 0027 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0061 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0308 � 0061 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000A � 0031 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0308 � 0031 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0031 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0308 � 0031 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0031 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0308 � 0031 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0031 � 002E � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000A � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0001 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0001",
+                     new String[] {  });
+
+    // � 000B � 0308 � 0001 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
+                     new String[] {  });
+
+    // � 000B � 000D �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\r",
+                     new String[] {  });
+
+    // � 000B � 0308 � 000D �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
+                     new String[] {  });
+
+    // � 000B � 000A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\n",
+                     new String[] {  });
+
+    // � 000B � 0308 � 000A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
+                     new String[] {  });
+
+    // � 000B � 000B �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u000B",
+                     new String[] {  });
+
+    // � 000B � 0308 � 000B �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
+                     new String[] {  });
+
+    // � 000B � 3031 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000B � 0308 � 3031 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // � 000B � 0041 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000B � 0308 � 0041 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // � 000B � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u003A",
+                     new String[] {  });
+
+    // � 000B � 0308 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
+                     new String[] {  });
+
+    // � 000B � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002C",
+                     new String[] {  });
+
+    // � 000B � 0308 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
+                     new String[] {  });
+
+    // � 000B � 002E �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u002E",
+                     new String[] {  });
+
+    // � 000B � 0308 � 002E �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
+                     new String[] {  });
+
+    // � 000B � 0030 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000B � 0308 � 0030 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // � 000B � 005F �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u005F",
+                     new String[] {  });
+
+    // � 000B � 0308 � 005F �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
+                     new String[] {  });
+
+    // � 000B � 1F1E6 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000B � 0308 � 1F1E6 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 000B � 05D0 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000B � 0308 � 05D0 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 000B � 0022 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\"",
+                     new String[] {  });
+
+    // � 000B � 0308 � 0022 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
+                     new String[] {  });
+
+    // � 000B � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0027",
+                     new String[] {  });
+
+    // � 000B � 0308 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
+                     new String[] {  });
+
+    // � 000B � 00AD �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u00AD",
+                     new String[] {  });
+
+    // � 000B � 0308 � 00AD �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
+                     new String[] {  });
+
+    // � 000B � 0300 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0300",
+                     new String[] {  });
+
+    // � 000B � 0308 � 0300 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
+                     new String[] {  });
+
+    // � 000B � 0061 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000B � 0308 � 0061 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
+                     new String[] { "\u0061\u2060" });
+
+    // � 000B � 0061 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0308 � 0061 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0061 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0308 � 0061 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0061 � 0027 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0061 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0308 � 0061 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
+                     new String[] { "\u0061" });
+
+    // � 000B � 0031 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0308 � 0031 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0031 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0308 � 0031 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0031 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0308 � 0031 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0031 � 002E � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 000B � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0031" });
+
+    // � 3031 � 0001 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0001",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 0001 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 000D �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\r",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 000D �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 000A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\n",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 000A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 000B �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u000B",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 000B �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 3031 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [13.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u3031",
+                     new String[] { "\u3031\u3031" });
+
+    // � 3031 � 0308 � 3031 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
+                     new String[] { "\u3031\u0308\u3031" });
+
+    // � 3031 � 0041 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0041",
+                     new String[] { "\u3031", "\u0041" });
+
+    // � 3031 � 0308 � 0041 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
+                     new String[] { "\u3031\u0308", "\u0041" });
+
+    // � 3031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u003A",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002C",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 002E �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u002E",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 002E �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 0030 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0030",
+                     new String[] { "\u3031", "\u0030" });
+
+    // � 3031 � 0308 � 0030 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
+                     new String[] { "\u3031\u0308", "\u0030" });
+
+    // � 3031 � 005F �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u005F",
+                     new String[] { "\u3031\u005F" });
+
+    // � 3031 � 0308 � 005F �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
+                     new String[] { "\u3031\u0308\u005F" });
+
+    // � 3031 � 1F1E6 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 1F1E6 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 05D0 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u05D0",
+                     new String[] { "\u3031", "\u05D0" });
+
+    // � 3031 � 0308 � 05D0 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
+                     new String[] { "\u3031\u0308", "\u05D0" });
+
+    // � 3031 � 0022 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\"",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 0022 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0027",
+                     new String[] { "\u3031" });
+
+    // � 3031 � 0308 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
+                     new String[] { "\u3031\u0308" });
+
+    // � 3031 � 00AD �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u00AD",
+                     new String[] { "\u3031\u00AD" });
+
+    // � 3031 � 0308 � 00AD �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
+                     new String[] { "\u3031\u0308\u00AD" });
+
+    // � 3031 � 0300 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0300",
+                     new String[] { "\u3031\u0300" });
+
+    // � 3031 � 0308 � 0300 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
+                     new String[] { "\u3031\u0308\u0300" });
+
+    // � 3031 � 0061 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
+                     new String[] { "\u3031", "\u0061\u2060" });
+
+    // � 3031 � 0308 � 0061 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
+                     new String[] { "\u3031\u0308", "\u0061\u2060" });
+
+    // � 3031 � 0061 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
+                     new String[] { "\u3031", "\u0061" });
+
+    // � 3031 � 0308 � 0061 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // � 3031 � 0061 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
+                     new String[] { "\u3031", "\u0061" });
+
+    // � 3031 � 0308 � 0061 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // � 3031 � 0061 � 0027 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
+                     new String[] { "\u3031", "\u0061" });
+
+    // � 3031 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // � 3031 � 0061 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
+                     new String[] { "\u3031", "\u0061" });
+
+    // � 3031 � 0308 � 0061 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
+                     new String[] { "\u3031\u0308", "\u0061" });
+
+    // � 3031 � 0031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
+                     new String[] { "\u3031", "\u0031" });
+
+    // � 3031 � 0308 � 0031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // � 3031 � 0031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
+                     new String[] { "\u3031", "\u0031" });
+
+    // � 3031 � 0308 � 0031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // � 3031 � 0031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
+                     new String[] { "\u3031", "\u0031" });
+
+    // � 3031 � 0308 � 0031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // � 3031 � 0031 � 002E � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
+                     new String[] { "\u3031", "\u0031" });
+
+    // � 3031 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
+                     new String[] { "\u3031\u0308", "\u0031" });
+
+    // � 0041 � 0001 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0001",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 0001 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 000D �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\r",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 000D �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 000A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\n",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 000A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 000B �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u000B",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 000B �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 3031 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u3031",
+                     new String[] { "\u0041", "\u3031" });
+
+    // � 0041 � 0308 � 3031 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
+                     new String[] { "\u0041\u0308", "\u3031" });
+
+    // � 0041 � 0041 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0041",
+                     new String[] { "\u0041\u0041" });
+
+    // � 0041 � 0308 � 0041 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
+                     new String[] { "\u0041\u0308\u0041" });
+
+    // � 0041 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u003A",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002C",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 002E �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u002E",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 002E �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 0030 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0030",
+                     new String[] { "\u0041\u0030" });
+
+    // � 0041 � 0308 � 0030 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
+                     new String[] { "\u0041\u0308\u0030" });
+
+    // � 0041 � 005F �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u005F",
+                     new String[] { "\u0041\u005F" });
+
+    // � 0041 � 0308 � 005F �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
+                     new String[] { "\u0041\u0308\u005F" });
+
+    // � 0041 � 1F1E6 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 1F1E6 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 05D0 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u05D0",
+                     new String[] { "\u0041\u05D0" });
+
+    // � 0041 � 0308 � 05D0 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
+                     new String[] { "\u0041\u0308\u05D0" });
+
+    // � 0041 � 0022 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\"",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 0022 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0027",
+                     new String[] { "\u0041" });
+
+    // � 0041 � 0308 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
+                     new String[] { "\u0041\u0308" });
+
+    // � 0041 � 00AD �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u00AD",
+                     new String[] { "\u0041\u00AD" });
+
+    // � 0041 � 0308 � 00AD �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD",
+                     new String[] { "\u0041\u0308\u00AD" });
+
+    // � 0041 � 0300 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0300",
+                     new String[] { "\u0041\u0300" });
+
+    // � 0041 � 0308 � 0300 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0300",
+                     new String[] { "\u0041\u0308\u0300" });
+
+    // � 0041 � 0061 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0061\u2060",
+                     new String[] { "\u0041\u0061\u2060" });
+
+    // � 0041 � 0308 � 0061 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060",
+                     new String[] { "\u0041\u0308\u0061\u2060" });
+
+    // � 0041 � 0061 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0061\u003A",
+                     new String[] { "\u0041\u0061" });
+
+    // � 0041 � 0308 � 0061 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A",
+                     new String[] { "\u0041\u0308\u0061" });
+
+    // � 0041 � 0061 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027",
+                     new String[] { "\u0041\u0061" });
+
+    // � 0041 � 0308 � 0061 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027",
+                     new String[] { "\u0041\u0308\u0061" });
+
+    // � 0041 � 0061 � 0027 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060",
+                     new String[] { "\u0041\u0061" });
+
+    // � 0041 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060",
+                     new String[] { "\u0041\u0308\u0061" });
+
+    // � 0041 � 0061 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0061\u002C",
+                     new String[] { "\u0041\u0061" });
+
+    // � 0041 � 0308 � 0061 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C",
+                     new String[] { "\u0041\u0308\u0061" });
+
+    // � 0041 � 0031 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0031\u003A",
+                     new String[] { "\u0041\u0031" });
+
+    // � 0041 � 0308 � 0031 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A",
+                     new String[] { "\u0041\u0308\u0031" });
+
+    // � 0041 � 0031 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0031\u0027",
+                     new String[] { "\u0041\u0031" });
+
+    // � 0041 � 0308 � 0031 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027",
+                     new String[] { "\u0041\u0308\u0031" });
+
+    // � 0041 � 0031 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0031\u002C",
+                     new String[] { "\u0041\u0031" });
+
+    // � 0041 � 0308 � 0031 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C",
+                     new String[] { "\u0041\u0308\u0031" });
+
+    // � 0041 � 0031 � 002E � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060",
+                     new String[] { "\u0041\u0031" });
+
+    // � 0041 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060",
+                     new String[] { "\u0041\u0308\u0031" });
+
+    // � 003A � 0001 �  #  � [0.2] COLON (MidLetter) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0001",
+                     new String[] {  });
+
+    // � 003A � 0308 � 0001 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u0001",
+                     new String[] {  });
+
+    // � 003A � 000D �  #  � [0.2] COLON (MidLetter) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\r",
+                     new String[] {  });
+
+    // � 003A � 0308 � 000D �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\r",
+                     new String[] {  });
+
+    // � 003A � 000A �  #  � [0.2] COLON (MidLetter) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\n",
+                     new String[] {  });
+
+    // � 003A � 0308 � 000A �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\n",
+                     new String[] {  });
+
+    // � 003A � 000B �  #  � [0.2] COLON (MidLetter) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u000B",
+                     new String[] {  });
+
+    // � 003A � 0308 � 000B �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u000B",
+                     new String[] {  });
+
+    // � 003A � 3031 �  #  � [0.2] COLON (MidLetter) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u3031",
+                     new String[] { "\u3031" });
+
+    // � 003A � 0308 � 3031 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u3031",
+                     new String[] { "\u3031" });
+
+    // � 003A � 0041 �  #  � [0.2] COLON (MidLetter) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0041",
+                     new String[] { "\u0041" });
+
+    // � 003A � 0308 � 0041 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u0041",
+                     new String[] { "\u0041" });
+
+    // � 003A � 003A �  #  � [0.2] COLON (MidLetter) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u003A",
+                     new String[] {  });
+
+    // � 003A � 0308 � 003A �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u003A",
+                     new String[] {  });
+
+    // � 003A � 002C �  #  � [0.2] COLON (MidLetter) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u002C",
+                     new String[] {  });
+
+    // � 003A � 0308 � 002C �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u002C",
+                     new String[] {  });
+
+    // � 003A � 002E �  #  � [0.2] COLON (MidLetter) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u002E",
+                     new String[] {  });
+
+    // � 003A � 0308 � 002E �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u002E",
+                     new String[] {  });
+
+    // � 003A � 0030 �  #  � [0.2] COLON (MidLetter) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0030",
+                     new String[] { "\u0030" });
+
+    // � 003A � 0308 � 0030 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u0030",
+                     new String[] { "\u0030" });
+
+    // � 003A � 005F �  #  � [0.2] COLON (MidLetter) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u005F",
+                     new String[] {  });
+
+    // � 003A � 0308 � 005F �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u005F",
+                     new String[] {  });
+
+    // � 003A � 1F1E6 �  #  � [0.2] COLON (MidLetter) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 003A � 0308 � 1F1E6 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6",
+                     new String[] {  });
+
+    // � 003A � 05D0 �  #  � [0.2] COLON (MidLetter) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 003A � 0308 � 05D0 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0",
+                     new String[] { "\u05D0" });
+
+    // � 003A � 0022 �  #  � [0.2] COLON (MidLetter) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\"",
+                     new String[] {  });
+
+    // � 003A � 0308 � 0022 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\"",
+                     new String[] {  });
+
+    // � 003A � 0027 �  #  � [0.2] COLON (MidLetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0027",
+                     new String[] {  });
+
+    // � 003A � 0308 � 0027 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u0027",
+                     new String[] {  });
+
+    // � 003A � 00AD �  #  � [0.2] COLON (MidLetter) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u00AD",
+                     new String[] {  });
+
+    // � 003A � 0308 � 00AD �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD",
+                     new String[] {  });
+
+    // � 003A � 0300 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0300",
+                     new String[] {  });
+
+    // � 003A � 0308 � 0300 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
+    assertAnalyzesTo(analyzer, "\u003A\u0308\u0300",
+                     new String[

<TRUNCATED>

[09/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
index 9772203..8f7f2cd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAnalyzers.java
@@ -22,6 +22,7 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
index 783811a..1d17237 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestBugInSomething.java
@@ -25,6 +25,7 @@ import java.util.HashSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.MockCharFilter;
 import org.apache.lucene.analysis.MockTokenFilter;
@@ -39,7 +40,6 @@ import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
 import org.apache.lucene.analysis.ngram.NGramTokenFilter;
 import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
 import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
index bf02ccd..4effc79 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@@ -50,6 +50,8 @@ import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.CharFilter;
 import org.apache.lucene.analysis.CrankyTokenFilter;
 import org.apache.lucene.analysis.MockGraphTokenFilter;
@@ -73,8 +75,8 @@ import org.apache.lucene.analysis.miscellaneous.HyphenatedWordsFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenOffsetFilter;
 import org.apache.lucene.analysis.miscellaneous.LimitTokenPositionFilter;
-import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
+import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter;
 import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
 import org.apache.lucene.analysis.path.PathHierarchyTokenizer;
 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
@@ -83,8 +85,6 @@ import org.apache.lucene.analysis.payloads.PayloadEncoder;
 import org.apache.lucene.analysis.snowball.TestSnowball;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.synonym.SynonymMap;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.wikipedia.WikipediaTokenizer;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.AttributeFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
index f7552c8..bbf9502 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java
@@ -17,16 +17,16 @@
 package org.apache.lucene.analysis.core;
 
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-
 import java.io.IOException;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Set;
-import java.util.HashSet;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 
 public class TestStopAnalyzer extends BaseTokenStreamTestCase {
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
deleted file mode 100644
index 25b89d9..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilter.java
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-import java.io.IOException;
-import java.io.StringReader;
-import java.util.ArrayList;
-
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.English;
-
-public class TestStopFilter extends BaseTokenStreamTestCase {
-  
-  // other StopFilter functionality is already tested by TestStopAnalyzer
-
-  public void testExactCase() throws IOException {
-    StringReader reader = new StringReader("Now is The Time");
-    CharArraySet stopWords = new CharArraySet(asSet("is", "the", "Time"), false);
-    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    in.setReader(reader);
-    TokenStream stream = new StopFilter(in, stopWords);
-    assertTokenStreamContents(stream, new String[] { "Now", "The" });
-  }
-
-  public void testStopFilt() throws IOException {
-    StringReader reader = new StringReader("Now is The Time");
-    String[] stopWords = new String[] { "is", "the", "Time" };
-    CharArraySet stopSet = StopFilter.makeStopSet(stopWords);
-    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    in.setReader(reader);
-    TokenStream stream = new StopFilter(in, stopSet);
-    assertTokenStreamContents(stream, new String[] { "Now", "The" });
-  }
-
-  /**
-   * Test Position increments applied by StopFilter with and without enabling this option.
-   */
-  public void testStopPositons() throws IOException {
-    StringBuilder sb = new StringBuilder();
-    ArrayList<String> a = new ArrayList<>();
-    for (int i=0; i<20; i++) {
-      String w = English.intToEnglish(i).trim();
-      sb.append(w).append(" ");
-      if (i%3 != 0) a.add(w);
-    }
-    log(sb.toString());
-    String stopWords[] = a.toArray(new String[0]);
-    for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
-    CharArraySet stopSet = StopFilter.makeStopSet(stopWords);
-    // with increments
-    StringReader reader = new StringReader(sb.toString());
-    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    in.setReader(reader);
-    StopFilter stpf = new StopFilter(in, stopSet);
-    doTestStopPositons(stpf);
-    // with increments, concatenating two stop filters
-    ArrayList<String> a0 = new ArrayList<>();
-    ArrayList<String> a1 = new ArrayList<>();
-    for (int i=0; i<a.size(); i++) {
-      if (i%2==0) { 
-        a0.add(a.get(i));
-      } else {
-        a1.add(a.get(i));
-      }
-    }
-    String stopWords0[] =  a0.toArray(new String[0]);
-    for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
-    String stopWords1[] =  a1.toArray(new String[0]);
-    for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
-    CharArraySet stopSet0 = StopFilter.makeStopSet(stopWords0);
-    CharArraySet stopSet1 = StopFilter.makeStopSet(stopWords1);
-    reader = new StringReader(sb.toString());
-    final MockTokenizer in1 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    in1.setReader(reader);
-    StopFilter stpf0 = new StopFilter(in1, stopSet0); // first part of the set
-    StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
-    doTestStopPositons(stpf01);
-  }
-
-  // LUCENE-3849: make sure after .end() we see the "ending" posInc
-  public void testEndStopword() throws Exception {
-    CharArraySet stopSet = StopFilter.makeStopSet("of");
-    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
-    in.setReader(new StringReader("test of"));
-    StopFilter stpf = new StopFilter(in, stopSet);
-    assertTokenStreamContents(stpf, new String[] { "test" },
-                              new int[] {0},
-                              new int[] {4},
-                              null,
-                              new int[] {1},
-                              null,
-                              7,
-                              1,
-                              null,
-                              true);    
-  }
-  
-  private void doTestStopPositons(StopFilter stpf) throws IOException {
-    CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
-    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
-    stpf.reset();
-    for (int i=0; i<20; i+=3) {
-      assertTrue(stpf.incrementToken());
-      log("Token "+i+": "+stpf);
-      String w = English.intToEnglish(i).trim();
-      assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
-      assertEquals("all but first token must have position increment of 3",i==0?1:3,posIncrAtt.getPositionIncrement());
-    }
-    assertFalse(stpf.incrementToken());
-    stpf.end();
-    stpf.close();
-  }
-  
-  // print debug info depending on VERBOSE
-  private static void log(String s) {
-    if (VERBOSE) {
-      System.out.println(s);
-    }
-  }
-  
-  // stupid filter that inserts synonym of 'hte' for 'the'
-  private class MockSynonymFilter extends TokenFilter {
-    State bufferedState;
-    CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-    PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
-
-    MockSynonymFilter(TokenStream input) {
-      super(input);
-    }
-
-    @Override
-    public boolean incrementToken() throws IOException {
-      if (bufferedState != null) {
-        restoreState(bufferedState);
-        posIncAtt.setPositionIncrement(0);
-        termAtt.setEmpty().append("hte");
-        bufferedState = null;
-        return true;
-      } else if (input.incrementToken()) {
-        if (termAtt.toString().equals("the")) {
-          bufferedState = captureState();
-        }
-        return true;
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public void reset() throws IOException {
-      super.reset();
-      bufferedState = null;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
index 9fca6b9..f2d6fe3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopFilterFactory.java
@@ -17,8 +17,8 @@
 package org.apache.lucene.analysis.core;
 
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
index 75ec358..966b1fd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechAnalyzer.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.cz;
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test the CzechAnalyzer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
index 7463f1d..3d45d57 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cz/TestCzechStemmer.java
@@ -22,11 +22,11 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Czech Stemmer.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
index 918962b..199981e 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/da/TestDanishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestDanishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
index 0e1f093..4c52c0e 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanAnalyzer.java
@@ -22,9 +22,9 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 public class TestGermanAnalyzer extends BaseTokenStreamTestCase {
   public void testReusableTokenStream() throws Exception {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
index 75c4499..cb67e93 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
index 80228f7..35a8004 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanMinimalStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
index c0c522f..c9d3140 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/de/TestGermanStemFilter.java
@@ -22,13 +22,13 @@ import java.io.InputStream;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
index 1fcbbbc..60aa6a8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestEnglishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestEnglishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
index 9563d00..be3e9c4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/en/TestPorterStemFilter.java
@@ -20,14 +20,14 @@ package org.apache.lucene.analysis.en;
 import java.io.IOException;
 import java.io.StringReader;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
index 4bffffa..39d40f4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/es/TestSpanishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestSpanishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
index d1f64b2..b2a3d68 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/eu/TestBasqueAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestBasqueAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
index 67982a2..8cad085 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fa/TestPersianAnalyzer.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.fa;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
index e9880c0..83d6dba3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestFinnishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
index 09c2b4e..1313aaf 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fi/TestFinnishLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
index 36fb0dc..9834621 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test case for FrenchAnalyzer.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
index 8a526f5..a8e18b0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
index d55fe51..99f9566 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/fr/TestFrenchMinimalStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
index 54d7254..50a6294 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ga/TestIrishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestIrishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
index a215121..78e1719 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestGalicianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
index d1ffe89..e57f6cd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/gl/TestGalicianMinimalStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Simple tests for {@link GalicianMinimalStemmer}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
index 63321d5..f95c455 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hi/TestHindiAnalyzer.java
@@ -18,7 +18,7 @@ package org.apache.lucene.analysis.hi;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
index 1ce8d38..cf591db 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestHungarianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
index 3b8951c..67399d0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hu/TestHungarianLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
index 5f39926..677351e 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hunspell/TestHunspellStemFilter.java
@@ -24,11 +24,11 @@ import java.util.Collections;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.IOUtils;
 import org.junit.AfterClass;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
index b9934c8..704187b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/hy/TestArmenianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestArmenianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
index 424f117..366bad7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/id/TestIndonesianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestIndonesianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
index bba4947..c7c51b8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/it/TestItalianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestItalianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java
index dda018c..86c3f16 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lt/TestLithuanianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestLithuanianAnalyzer extends BaseTokenStreamTestCase {
   

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
index 4c6e432..5f400b5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/lv/TestLatvianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestLatvianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
index ef1c30e..5590f04 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestCapitalizationFilter.java
@@ -25,10 +25,10 @@ import java.util.List;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.junit.Test;
 
 import static org.apache.lucene.analysis.miscellaneous.CapitalizationFilter.*;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
index 6110e2b..dde6f94 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepFilterFactory.java
@@ -17,8 +17,8 @@
 package org.apache.lucene.analysis.miscellaneous;
 
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 
@@ -49,4 +49,4 @@ public class TestKeepFilterFactory extends BaseTokenStreamFactoryTestCase {
     });
     assertTrue(expected.getMessage().contains("Unknown parameters"));
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
index 847b26c..19e77b0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeepWordFilter.java
@@ -21,10 +21,10 @@ import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /** Test {@link KeepWordFilter} */
 public class TestKeepWordFilter extends BaseTokenStreamTestCase {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
index c5b2481..67a421b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestKeywordMarkerFilter.java
@@ -21,11 +21,11 @@ import java.util.Locale;
 import java.util.regex.Pattern;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.junit.Test;
 
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
index ef4856c..1e4fce0 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestStemmerOverrideFilter.java
@@ -26,13 +26,13 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharacterUtils;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.en.PorterStemFilter;
 import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter.StemmerOverrideMap;
-import org.apache.lucene.analysis.util.CharacterUtils;
 import org.apache.lucene.util.TestUtil;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
index c7dfa7d..a22d9c9 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestWordDelimiterFilter.java
@@ -16,19 +16,19 @@
  */
 package org.apache.lucene.analysis.miscellaneous;
 
+import java.io.IOException;
+import java.util.*;
+
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.IOUtils;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.util.*;
-
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*;
 import static org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java
index b7f3ebc..8055660 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/nl/TestDutchAnalyzer.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.nl;
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test the Dutch Stem Filter, which only modifies the term text.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
index 1dd9217..9cb494d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestNorwegianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
index 38fe12b..89e52af 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianLightStemFilter.java
@@ -23,12 +23,12 @@ import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
index d0593dc..69b5b0c 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/no/TestNorwegianMinimalStemFilter.java
@@ -23,12 +23,12 @@ import java.util.Random;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 import static org.apache.lucene.analysis.no.NorwegianLightStemmer.BOKMAAL;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
index 7e4dba7..d948c30 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestPortugueseAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
index 00a6d0f..95d3ff7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
index e9dd584..b44460f 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseMinimalStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
index 5209923..7bdaac8 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pt/TestPortugueseStemFilter.java
@@ -17,18 +17,18 @@
 package org.apache.lucene.analysis.pt;
 
 
-import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
-
 import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
+
+import static org.apache.lucene.analysis.VocabularyAssert.assertVocabulary;
 
 /**
  * Simple tests for {@link PortugueseStemFilter}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
index 1d4e2f5..15c0286 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ro/TestRomanianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestRomanianAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
index 60e9fb4..174feb1 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ru;
 
 import java.io.IOException;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test case for RussianAnalyzer.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
index 19b9309..604b230 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
index c97ec03..bcdefed 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@@ -19,15 +19,15 @@ package org.apache.lucene.analysis.shingle;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.TextField;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
index 735f12e..c0127a3 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sinks/TestTeeSinkTokenFilter.java
@@ -23,16 +23,16 @@ import java.util.Locale;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
 import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;

[03/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/TestWordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestWordlistLoader.java b/lucene/core/src/test/org/apache/lucene/analysis/TestWordlistLoader.java
new file mode 100644
index 0000000..943e427
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestWordlistLoader.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.StringReader;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+import org.apache.lucene.analysis.WordlistLoader;
+
+public class TestWordlistLoader extends LuceneTestCase {
+
+  public void testWordlistLoading() throws IOException {
+    String s = "ONE\n  two \nthree";
+    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
+    checkSet(wordSet1);
+    CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
+    checkSet(wordSet2);
+  }
+
+  public void testComments() throws Exception {
+    String s = "ONE\n  two \nthree\n#comment";
+    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
+    checkSet(wordSet1);
+    assertFalse(wordSet1.contains("#comment"));
+    assertFalse(wordSet1.contains("comment"));
+  }
+
+
+  private void checkSet(CharArraySet wordset) {
+    assertEquals(3, wordset.size());
+    assertTrue(wordset.contains("ONE"));  // case is not modified
+    assertTrue(wordset.contains("two"));  // surrounding whitespace is removed
+    assertTrue(wordset.contains("three"));
+    assertFalse(wordset.contains("four"));
+  }
+
+  /**
+   * Test stopwords in snowball format
+   */
+  public void testSnowballListLoading() throws IOException {
+    String s = 
+      "|comment\n" + // commented line
+      " |comment\n" + // commented line with leading whitespace
+      "\n" + // blank line
+      "  \t\n" + // line with only whitespace
+      " |comment | comment\n" + // commented line with comment
+      "ONE\n" + // stopword, in uppercase
+      "   two   \n" + // stopword with leading/trailing space
+      " three   four five \n" + // multiple stopwords
+      "six seven | comment\n"; //multiple stopwords + comment
+    CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
+    assertEquals(7, wordset.size());
+    assertTrue(wordset.contains("ONE"));
+    assertTrue(wordset.contains("two"));
+    assertTrue(wordset.contains("three"));
+    assertTrue(wordset.contains("four"));
+    assertTrue(wordset.contains("five"));
+    assertTrue(wordset.contains("six"));
+    assertTrue(wordset.contains("seven"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
new file mode 100644
index 0000000..6c6ddc8
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/standard/TestStandardAnalyzer.java
@@ -0,0 +1,390 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockGraphTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.TestUtil;
+
+public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
+
+  // LUCENE-5897: slow tokenization of strings of the form (\p{WB:ExtendNumLet}[\p{WB:Format}\p{WB:Extend}]*)+
+  @Slow
+  public void testLargePartiallyMatchingToken() throws Exception {
+    // TODO: get these lists of chars matching a property from ICU4J
+    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
+    char[] WordBreak_ExtendNumLet_chars = "_\u203f\u2040\u2054\ufe33\ufe34\ufe4d\ufe4e\ufe4f\uff3f".toCharArray();
+
+    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
+    int[] WordBreak_Format_chars // only the first char in ranges 
+        = { 0xAD, 0x600, 0x61C, 0x6DD, 0x70F, 0x180E, 0x200E, 0x202A, 0x2060, 0x2066, 0xFEFF,
+            0xFFF9, 0x110BD, 0x1D173, 0xE0001, 0xE0020 };
+
+    // http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
+    int[] WordBreak_Extend_chars // only the first char in ranges
+        = { 0x300, 0x483, 0x591, 0x5bf, 0x5c1, 0x5c4, 0x5c7, 0x610, 0x64b, 0x670, 0x6d6, 0x6df,
+            0x6e7, 0x6ea, 0x711, 0x730, 0x7a6, 0x7eb, 0x816, 0x81b, 0x825, 0x829, 0x859, 0x8e4,
+            0x900, 0x93a, 0x93e, 0x951, 0x962, 0x981, 0x9bc, 0x9be, 0x9c7, 0x9cb, 0x9d7, 0x9e2,
+            0xa01, 0xa3c, 0xa3e, 0xa47, 0xa4b, 0xa51, 0xa70, 0xa75, 0xa81, 0xabc, 0xabe, 0xac7,
+            0xacb, 0xae2, 0xb01, 0xb3c, 0xb3e, 0xb47, 0xb4b, 0xb56, 0xb62, 0xb82, 0xbbe, 0xbc6,
+            0xbca, 0xbd7, 0xc01, 0xc3e, 0xc46, 0xc4a, 0xc55, 0xc62, 0xc82, 0xcbc, 0xcbe, 0xcc6,
+            0xcca, 0xcd5, 0xce2, 0xd02, 0xd3e, 0xd46, 0xd4a, 0xd57, 0xd62, 0xd82, 0xdca, 0xdcf,
+            0xdd6, 0xdd8, 0xdf2, 0xe31, 0xe34, 0xe47, 0xeb1, 0xeb4, 0xebb, 0xec8, 0xf18, 0xf35,
+            0xf37, 0xf39, 0xf3e, 0xf71, 0xf86, 0xf8d, 0xf99, 0xfc6, 0x102b, 0x1056, 0x105e, 0x1062,
+            0x1067, 0x1071, 0x1082, 0x108f, 0x109a, 0x135d, 0x1712, 0x1732, 0x1752, 0x1772, 0x17b4, 
+            0x17dd, 0x180b, 0x18a9, 0x1920, 0x1930, 0x19b0, 0x19c8, 0x1a17, 0x1a55, 0x1a60, 0x1a7f,
+            0x1b00, 0x1b34, 0x1b6b, 0x1b80, 0x1ba1, 0x1be6, 0x1c24, 0x1cd0, 0x1cd4, 0x1ced, 0x1cf2, 
+            0x1dc0, 0x1dfc, 0x200c, 0x20d0, 0x2cef, 0x2d7f, 0x2de0, 0x302a, 0x3099, 0xa66f, 0xa674,
+            0xa69f, 0xa6f0, 0xa802, 0xa806, 0xa80b, 0xa823, 0xa880, 0xa8b4, 0xa8e0, 0xa926, 0xa947, 
+            0xa980, 0xa9b3, 0xaa29, 0xaa43, 0xaa4c, 0xaa7b, 0xaab0, 0xaab2, 0xaab7, 0xaabe, 0xaac1,
+            0xaaeb, 0xaaf5, 0xabe3, 0xabec, 0xfb1e, 0xfe00, 0xfe20, 0xff9e, 0x101fd, 0x10a01,
+            0x10a05, 0x10a0C, 0x10a38, 0x10a3F, 0x11000, 0x11001, 0x11038, 0x11080, 0x11082,
+            0x110b0, 0x110b3, 0x110b7, 0x110b9, 0x11100, 0x11127, 0x1112c, 0x11180, 0x11182,
+            0x111b3, 0x111b6, 0x111bF, 0x116ab, 0x116ac, 0x116b0, 0x116b6, 0x16f51, 0x16f8f,
+            0x1d165, 0x1d167, 0x1d16d, 0x1d17b, 0x1d185, 0x1d1aa, 0x1d242, 0xe0100 }; 
+        
+    StringBuilder builder = new StringBuilder();
+    int numChars = TestUtil.nextInt(random(), 100 * 1024, 1024 * 1024);
+    for (int i = 0 ; i < numChars ; ) {
+      builder.append(WordBreak_ExtendNumLet_chars[random().nextInt(WordBreak_ExtendNumLet_chars.length)]);
+      ++i;
+      if (random().nextBoolean()) {
+        int numFormatExtendChars = TestUtil.nextInt(random(), 1, 8);
+        for (int j = 0; j < numFormatExtendChars; ++j) {
+          int codepoint;
+          if (random().nextBoolean()) {
+            codepoint = WordBreak_Format_chars[random().nextInt(WordBreak_Format_chars.length)];
+          } else {
+            codepoint = WordBreak_Extend_chars[random().nextInt(WordBreak_Extend_chars.length)];
+          }
+          char[] chars = Character.toChars(codepoint);
+          builder.append(chars);
+          i += chars.length;
+        }
+      }
+    }
+    StandardTokenizer ts = new StandardTokenizer();
+    ts.setReader(new StringReader(builder.toString()));
+    ts.reset();
+    while (ts.incrementToken()) { }
+    ts.end();
+    ts.close();
+
+    int newBufferSize = TestUtil.nextInt(random(), 200, 8192);
+    ts.setMaxTokenLength(newBufferSize); // try a different buffer size
+    ts.setReader(new StringReader(builder.toString()));
+    ts.reset();
+    while (ts.incrementToken()) { }
+    ts.end();
+    ts.close();
+  }
+  
+  public void testHugeDoc() throws IOException {
+    StringBuilder sb = new StringBuilder();
+    char whitespace[] = new char[4094];
+    Arrays.fill(whitespace, ' ');
+    sb.append(whitespace);
+    sb.append("testing 1234");
+    String input = sb.toString();
+    StandardTokenizer tokenizer = new StandardTokenizer();
+    tokenizer.setReader(new StringReader(input));
+    BaseTokenStreamTestCase.assertTokenStreamContents(tokenizer, new String[] { "testing", "1234" });
+  }
+
+  private Analyzer a;
+  
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory());
+        return new TokenStreamComponents(tokenizer);
+      }
+    };
+  }
+  
+  @Override
+  public void tearDown() throws Exception {
+    a.close();
+    super.tearDown();
+  }
+
+  public void testArmenian() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b 13 \u0574\u056b\u056c\u056b\u0578\u0576 \u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568 (4,600` \u0570\u0561\u0575\u0565\u0580\u0565\u0576 \u057e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u0578\u0582\u0574) \u0563\u0580\u057e\u0565\u056c \u0565\u0576 \u056f\u0561\u0574\u0561\u057e\u0578\u0580\u0576\u0565\u0580\u056b \u056f\u0578\u0572\u0574\u056b\u0581 \u0578\u0582 \u0570\u0561\u0574\u0561\u0580\u0575\u0561 \u0562\u0578\u056c\u0578\u0580 \u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568 \u056f\u0561\u0580\u0578\u0572 \u0567 \u056d\u0574\u0562\u0561\u0563\u0580\u0565\u056c \u0581\u0561\u0576\u056f\u0561\u0581 \u0574\u0561\u0580\u0564 \u0578\u057e \u056f\u0561\u0580\u0578\u0572 \u0567 \u0562\u0561\u0581\u0565\u056c \u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b \u056f\u0561\u0575\u0584\u0568\u0589",
+        new String[] { "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b", "13", "\u0574\u056b\u056c\u056b\u0578\u0576", "\u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568", "4,600", "\u0570\u0561\u0575\u0565\u0580\u0565\u0576", "\u057e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u0578\u0582\u0574", "\u0563\u0580\u057e\u0565\u056c", "\u0565\u0576", "\u056f\u0561\u0574\u0561\u057e\u0578\u0580\u0576\u0565\u0580\u056b", "\u056f\u0578\u0572\u0574\u056b\u0581", 
+        "\u0578\u0582", "\u0570\u0561\u0574\u0561\u0580\u0575\u0561", "\u0562\u0578\u056c\u0578\u0580", "\u0570\u0578\u0564\u057e\u0561\u056e\u0576\u0565\u0580\u0568", "\u056f\u0561\u0580\u0578\u0572", "\u0567", "\u056d\u0574\u0562\u0561\u0563\u0580\u0565\u056c", "\u0581\u0561\u0576\u056f\u0561\u0581", "\u0574\u0561\u0580\u0564", "\u0578\u057e", "\u056f\u0561\u0580\u0578\u0572", "\u0567", "\u0562\u0561\u0581\u0565\u056c", "\u054e\u056b\u0584\u056b\u057a\u0565\u0564\u056b\u0561\u0575\u056b", "\u056f\u0561\u0575\u0584\u0568" } );
+  }
+  
+  public void testAmharic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u12ca\u12aa\u1354\u12f5\u12eb \u12e8\u1263\u1208 \u1265\u12d9 \u124b\u1295\u124b \u12e8\u1270\u121f\u120b \u1275\u12ad\u12ad\u1208\u129b\u1293 \u1290\u133b \u1218\u12dd\u1308\u1260 \u12d5\u12cd\u1240\u1275 (\u12a2\u1295\u1233\u12ed\u12ad\u120e\u1352\u12f2\u12eb) \u1290\u12cd\u1362 \u121b\u1295\u129b\u12cd\u121d",
+        new String[] { "\u12ca\u12aa\u1354\u12f5\u12eb", "\u12e8\u1263\u1208", "\u1265\u12d9", "\u124b\u1295\u124b", "\u12e8\u1270\u121f\u120b", "\u1275\u12ad\u12ad\u1208\u129b\u1293", "\u1290\u133b", "\u1218\u12dd\u1308\u1260", "\u12d5\u12cd\u1240\u1275", "\u12a2\u1295\u1233\u12ed\u12ad\u120e\u1352\u12f2\u12eb", "\u1290\u12cd", "\u121b\u1295\u129b\u12cd\u121d" } );
+  }
+  
+  public void testArabic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0627\u0644\u0641\u064a\u0644\u0645 \u0627\u0644\u0648\u062b\u0627\u0626\u0642\u064a \u0627\u0644\u0623\u0648\u0644 \u0639\u0646 \u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627 \u064a\u0633\u0645\u0649 \"\u0627\u0644\u062d\u0642\u064a\u0642\u0629 \u0628\u0627\u0644\u0623\u0631\u0642\u0627\u0645: \u0642\u0635\u0629 \u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627\" (\u0628\u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629: Truth in Numbers: The Wikipedia Story)\u060c \u0633\u064a\u062a\u0645 \u0625\u0637\u0644\u0627\u0642\u0647 \u0641\u064a 2008.",
+        new String[] { "\u0627\u0644\u0641\u064a\u0644\u0645", "\u0627\u0644\u0648\u062b\u0627\u0626\u0642\u064a", "\u0627\u0644\u0623\u0648\u0644", "\u0639\u0646", "\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627", "\u064a\u0633\u0645\u0649", "\u0627\u0644\u062d\u0642\u064a\u0642\u0629", "\u0628\u0627\u0644\u0623\u0631\u0642\u0627\u0645", "\u0642\u0635\u0629", "\u0648\u064a\u0643\u064a\u0628\u064a\u062f\u064a\u0627",
+        "\u0628\u0627\u0644\u0625\u0646\u062c\u0644\u064a\u0632\u064a\u0629", "Truth", "in", "Numbers", "The", "Wikipedia", "Story", "\u0633\u064a\u062a\u0645", "\u0625\u0637\u0644\u0627\u0642\u0647", "\u0641\u064a", "2008" } ); 
+  }
+  
+  public void testAramaic() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0718\u071d\u0729\u071d\u0726\u0715\u071d\u0710 (\u0710\u0722\u0713\u0720\u071d\u0710: Wikipedia) \u0717\u0718 \u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710 \u071a\u0710\u072a\u072c\u0710 \u0715\u0710\u0722\u071b\u072a\u0722\u071b \u0712\u0720\u072b\u0722\u0308\u0710 \u0723\u0713\u071d\u0710\u0308\u0710\u0702 \u072b\u0721\u0717 \u0710\u072c\u0710 \u0721\u0722 \u0721\u0308\u0720\u072c\u0710 \u0715\"\u0718\u071d\u0729\u071d\" \u0718\"\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710\"\u0700",
+        new String[] { "\u0718\u071d\u0729\u071d\u0726\u0715\u071d\u0710", "\u0710\u0722\u0713\u0720\u071d\u0710", "Wikipedia", "\u0717\u0718", "\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710", "\u071a\u0710\u072a\u072c\u0710", "\u0715\u0710\u0722\u071b\u072a\u0722\u071b", "\u0712\u0720\u072b\u0722\u0308\u0710", "\u0723\u0713\u071d\u0710\u0308\u0710", "\u072b\u0721\u0717",
+        "\u0710\u072c\u0710", "\u0721\u0722", "\u0721\u0308\u0720\u072c\u0710", "\u0715", "\u0718\u071d\u0729\u071d", "\u0718", "\u0710\u071d\u0722\u0723\u0729\u0720\u0718\u0726\u0715\u071d\u0710"});
+  }
+  
+  public void testBengali() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u098f\u0987 \u09ac\u09bf\u09b6\u09cd\u09ac\u0995\u09cb\u09b7 \u09aa\u09b0\u09bf\u099a\u09be\u09b2\u09a8\u09be \u0995\u09b0\u09c7 \u0989\u0987\u0995\u09bf\u09ae\u09bf\u09a1\u09bf\u09af\u09bc\u09be \u09ab\u09be\u0989\u09a8\u09cd\u09a1\u09c7\u09b6\u09a8 (\u098f\u0995\u099f\u09bf \u0985\u09b2\u09be\u09ad\u099c\u09a8\u0995 \u09b8\u0982\u09b8\u09cd\u09a5\u09be)\u0964 \u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be\u09b0 \u09b6\u09c1\u09b0\u09c1 \u09e7\u09eb \u099c\u09be\u09a8\u09c1\u09af\u09bc\u09be\u09b0\u09bf, \u09e8\u09e6\u09e6\u09e7 \u09b8\u09be\u09b2\u09c7\u0964 \u098f\u0996\u09a8 \u09aa\u09b0\u09cd\u09af\u09a8\u09cd\u09a4 \u09e8\u09e6\u09e6\u099f\u09bf\u09b0\u0993 \u09ac\u09c7\u09b6\u09c0 \u09ad\u09be\u09b7\u09be\u09af\u09bc \u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be \u09b0\u09af\u09bc\u09c7\u099b\u09c7\u0964",
+        new String[] { "\u098f\u0987", "\u09ac\u09bf\u09b6\u09cd\u09ac\u0995\u09cb\u09b7", "\u09aa\u09b0\u09bf\u099a\u09be\u09b2\u09a8\u09be", "\u0995\u09b0\u09c7", "\u0989\u0987\u0995\u09bf\u09ae\u09bf\u09a1\u09bf\u09af\u09bc\u09be", "\u09ab\u09be\u0989\u09a8\u09cd\u09a1\u09c7\u09b6\u09a8", "\u098f\u0995\u099f\u09bf", "\u0985\u09b2\u09be\u09ad\u099c\u09a8\u0995", "\u09b8\u0982\u09b8\u09cd\u09a5\u09be", "\u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be\u09b0",
+        "\u09b6\u09c1\u09b0\u09c1", "\u09e7\u09eb", "\u099c\u09be\u09a8\u09c1\u09af\u09bc\u09be\u09b0\u09bf", "\u09e8\u09e6\u09e6\u09e7", "\u09b8\u09be\u09b2\u09c7", "\u098f\u0996\u09a8", "\u09aa\u09b0\u09cd\u09af\u09a8\u09cd\u09a4", "\u09e8\u09e6\u09e6\u099f\u09bf\u09b0\u0993", "\u09ac\u09c7\u09b6\u09c0", "\u09ad\u09be\u09b7\u09be\u09af\u09bc", "\u0989\u0987\u0995\u09bf\u09aa\u09bf\u09a1\u09bf\u09af\u09bc\u09be", "\u09b0\u09af\u09bc\u09c7\u099b\u09c7" });
+  }
+  
+  public void testFarsi() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0648\u06cc\u06a9\u06cc \u067e\u062f\u06cc\u0627\u06cc \u0627\u0646\u06af\u0644\u06cc\u0633\u06cc \u062f\u0631 \u062a\u0627\u0631\u06cc\u062e \u06f2\u06f5 \u062f\u06cc \u06f1\u06f3\u06f7\u06f9 \u0628\u0647 \u0635\u0648\u0631\u062a \u0645\u06a9\u0645\u0644\u06cc \u0628\u0631\u0627\u06cc \u062f\u0627\u0646\u0634\u0646\u0627\u0645\u0647\u0654 \u062a\u062e\u0635\u0635\u06cc \u0646\u0648\u067e\u062f\u06cc\u0627 \u0646\u0648\u0634\u062a\u0647 \u0634\u062f.",
+        new String[] { "\u0648\u06cc\u06a9\u06cc", "\u067e\u062f\u06cc\u0627\u06cc", "\u0627\u0646\u06af\u0644\u06cc\u0633\u06cc", "\u062f\u0631", "\u062a\u0627\u0631\u06cc\u062e", "\u06f2\u06f5", "\u062f\u06cc", "\u06f1\u06f3\u06f7\u06f9", "\u0628\u0647", "\u0635\u0648\u0631\u062a", "\u0645\u06a9\u0645\u0644\u06cc",
+        "\u0628\u0631\u0627\u06cc", "\u062f\u0627\u0646\u0634\u0646\u0627\u0645\u0647\u0654", "\u062a\u062e\u0635\u0635\u06cc", "\u0646\u0648\u067e\u062f\u06cc\u0627", "\u0646\u0648\u0634\u062a\u0647", "\u0634\u062f" });
+  }
+  
+  public void testGreek() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0393\u03c1\u03ac\u03c6\u03b5\u03c4\u03b1\u03b9 \u03c3\u03b5 \u03c3\u03c5\u03bd\u03b5\u03c1\u03b3\u03b1\u03c3\u03af\u03b1 \u03b1\u03c0\u03cc \u03b5\u03b8\u03b5\u03bb\u03bf\u03bd\u03c4\u03ad\u03c2 \u03bc\u03b5 \u03c4\u03bf \u03bb\u03bf\u03b3\u03b9\u03c3\u03bc\u03b9\u03ba\u03cc wiki, \u03ba\u03ac\u03c4\u03b9 \u03c0\u03bf\u03c5 \u03c3\u03b7\u03bc\u03b1\u03af\u03bd\u03b5\u03b9 \u03cc\u03c4\u03b9 \u03ac\u03c1\u03b8\u03c1\u03b1 \u03bc\u03c0\u03bf\u03c1\u03b5\u03af \u03bd\u03b1 \u03c0\u03c1\u03bf\u03c3\u03c4\u03b5\u03b8\u03bf\u03cd\u03bd \u03ae \u03bd\u03b1 \u03b1\u03bb\u03bb\u03ac\u03be\u03bf\u03c5\u03bd \u03b1\u03c0\u03cc \u03c4\u03bf\u03bd \u03ba\u03b1\u03b8\u03ad\u03bd\u03b1.",
+        new String[] { "\u0393\u03c1\u03ac\u03c6\u03b5\u03c4\u03b1\u03b9", "\u03c3\u03b5", "\u03c3\u03c5\u03bd\u03b5\u03c1\u03b3\u03b1\u03c3\u03af\u03b1", "\u03b1\u03c0\u03cc", "\u03b5\u03b8\u03b5\u03bb\u03bf\u03bd\u03c4\u03ad\u03c2", "\u03bc\u03b5", "\u03c4\u03bf", "\u03bb\u03bf\u03b3\u03b9\u03c3\u03bc\u03b9\u03ba\u03cc", "wiki", "\u03ba\u03ac\u03c4\u03b9", "\u03c0\u03bf\u03c5",
+        "\u03c3\u03b7\u03bc\u03b1\u03af\u03bd\u03b5\u03b9", "\u03cc\u03c4\u03b9", "\u03ac\u03c1\u03b8\u03c1\u03b1", "\u03bc\u03c0\u03bf\u03c1\u03b5\u03af", "\u03bd\u03b1", "\u03c0\u03c1\u03bf\u03c3\u03c4\u03b5\u03b8\u03bf\u03cd\u03bd", "\u03ae", "\u03bd\u03b1", "\u03b1\u03bb\u03bb\u03ac\u03be\u03bf\u03c5\u03bd", "\u03b1\u03c0\u03cc", "\u03c4\u03bf\u03bd", "\u03ba\u03b1\u03b8\u03ad\u03bd\u03b1" });
+  }
+
+  public void testThai() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0e01\u0e32\u0e23\u0e17\u0e35\u0e48\u0e44\u0e14\u0e49\u0e15\u0e49\u0e2d\u0e07\u0e41\u0e2a\u0e14\u0e07\u0e27\u0e48\u0e32\u0e07\u0e32\u0e19\u0e14\u0e35. \u0e41\u0e25\u0e49\u0e27\u0e40\u0e18\u0e2d\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e2b\u0e19? \u0e51\u0e52\u0e53\u0e54",
+        new String[] { "\u0e01\u0e32\u0e23\u0e17\u0e35\u0e48\u0e44\u0e14\u0e49\u0e15\u0e49\u0e2d\u0e07\u0e41\u0e2a\u0e14\u0e07\u0e27\u0e48\u0e32\u0e07\u0e32\u0e19\u0e14\u0e35", "\u0e41\u0e25\u0e49\u0e27\u0e40\u0e18\u0e2d\u0e08\u0e30\u0e44\u0e1b\u0e44\u0e2b\u0e19", "\u0e51\u0e52\u0e53\u0e54" });
+  }
+  
+  public void testLao() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0eaa\u0eb2\u0e97\u0eb2\u0ea5\u0eb0\u0e99\u0eb0\u0ea5\u0eb1\u0e94 \u0e9b\u0eb0\u0e8a\u0eb2\u0e97\u0eb4\u0e9b\u0eb0\u0ec4\u0e95 \u0e9b\u0eb0\u0e8a\u0eb2\u0e8a\u0ebb\u0e99\u0ea5\u0eb2\u0ea7", 
+        new String[] { "\u0eaa\u0eb2\u0e97\u0eb2\u0ea5\u0eb0\u0e99\u0eb0\u0ea5\u0eb1\u0e94", "\u0e9b\u0eb0\u0e8a\u0eb2\u0e97\u0eb4\u0e9b\u0eb0\u0ec4\u0e95", "\u0e9b\u0eb0\u0e8a\u0eb2\u0e8a\u0ebb\u0e99\u0ea5\u0eb2\u0ea7" });
+  }
+  
+  public void testTibetan() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u0f66\u0fa3\u0f7c\u0f53\u0f0b\u0f58\u0f5b\u0f7c\u0f51\u0f0b\u0f51\u0f44\u0f0b\u0f63\u0f66\u0f0b\u0f60\u0f51\u0f72\u0f66\u0f0b\u0f56\u0f7c\u0f51\u0f0b\u0f61\u0f72\u0f42\u0f0b\u0f58\u0f72\u0f0b\u0f49\u0f58\u0f66\u0f0b\u0f42\u0f7c\u0f44\u0f0b\u0f60\u0f55\u0f7a\u0f63\u0f0b\u0f51\u0f74\u0f0b\u0f42\u0f4f\u0f7c\u0f44\u0f0b\u0f56\u0f62\u0f0b\u0f67\u0f0b\u0f45\u0f44\u0f0b\u0f51\u0f42\u0f7a\u0f0b\u0f58\u0f5a\u0f53\u0f0b\u0f58\u0f46\u0f72\u0f66\u0f0b\u0f66\u0f7c\u0f0d \u0f0d",
+                     new String[] { "\u0f66\u0fa3\u0f7c\u0f53", "\u0f58\u0f5b\u0f7c\u0f51", "\u0f51\u0f44", "\u0f63\u0f66", "\u0f60\u0f51\u0f72\u0f66", "\u0f56\u0f7c\u0f51", "\u0f61\u0f72\u0f42", 
+                                    "\u0f58\u0f72", "\u0f49\u0f58\u0f66", "\u0f42\u0f7c\u0f44", "\u0f60\u0f55\u0f7a\u0f63", "\u0f51\u0f74", "\u0f42\u0f4f\u0f7c\u0f44", "\u0f56\u0f62", 
+                                    "\u0f67", "\u0f45\u0f44", "\u0f51\u0f42\u0f7a", "\u0f58\u0f5a\u0f53", "\u0f58\u0f46\u0f72\u0f66", "\u0f66\u0f7c" });
+  }
+  
+  /*
+   * For chinese, tokenize as char (these can later form bigrams or whatever)
+   */
+  public void testChinese() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u6211\u662f\u4e2d\u56fd\u4eba\u3002 \uff11\uff12\uff13\uff14 \uff34\uff45\uff53\uff54\uff53 ",
+        new String[] { "\u6211", "\u662f", "\u4e2d", "\u56fd", "\u4eba", "\uff11\uff12\uff13\uff14", "\uff34\uff45\uff53\uff54\uff53"});
+  }
+  
+  public void testEmpty() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, ".", new String[] {});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, " ", new String[] {});
+  }
+  
+  /* test various jira issues this analyzer is related to */
+  
+  public void testLUCENE1545() throws Exception {
+    /*
+     * Standard analyzer does not correctly tokenize combining character U+0364 COMBINING LATIN SMALL LETTRE E.
+     * The word "mo\u0364chte" is incorrectly tokenized into "mo" "chte", the combining character is lost.
+     * Expected result is only on token "mo\u0364chte".
+     */
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "mo\u0364chte", new String[] { "mo\u0364chte" }); 
+  }
+  
+  /* Tests from StandardAnalyzer, just to show behavior is similar */
+  public void testAlphanumericSA() throws Exception {
+    // alphanumeric tokens
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "B2B", new String[]{"B2B"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "2B", new String[]{"2B"});
+  }
+
+  public void testDelimitersSA() throws Exception {
+    // other delimiters: "-", "/", ","
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "some-dashed-phrase", new String[]{"some", "dashed", "phrase"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "dogs,chase,cats", new String[]{"dogs", "chase", "cats"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "ac/dc", new String[]{"ac", "dc"});
+  }
+
+  public void testApostrophesSA() throws Exception {
+    // internal apostrophes: O'Reilly, you're, O'Reilly's
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly", new String[]{"O'Reilly"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "you're", new String[]{"you're"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "she's", new String[]{"she's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "Jim's", new String[]{"Jim's"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "don't", new String[]{"don't"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "O'Reilly's", new String[]{"O'Reilly's"});
+  }
+
+  public void testNumericSA() throws Exception {
+    // floating point, serial, model numbers, ip addresses, etc.
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "21.35", new String[]{"21.35"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "R2D2 C3PO", new String[]{"R2D2", "C3PO"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "216.239.63.104", new String[]{"216.239.63.104"});
+  }
+
+  public void testTextWithNumbersSA() throws Exception {
+    // numbers
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", new String[]{"David", "has", "5000", "bones"});
+  }
+
+  public void testVariousTextSA() throws Exception {
+    // various
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C embedded developers wanted", new String[]{"C", "embedded", "developers", "wanted"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo bar FOO BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "foo      bar .  FOO <> BAR", new String[]{"foo", "bar", "FOO", "BAR"});
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\"QUOTED\" word", new String[]{"QUOTED", "word"});
+  }
+
+  public void testKoreanSA() throws Exception {
+    // Korean words
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\uc548\ub155\ud558\uc138\uc694 \ud55c\uae00\uc785\ub2c8\ub2e4", new String[]{"\uc548\ub155\ud558\uc138\uc694", "\ud55c\uae00\uc785\ub2c8\ub2e4"});
+  }
+  
+  public void testOffsets() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new int[] {0, 6, 10, 15},
+        new int[] {5, 9, 14, 20});
+  }
+  
+  public void testTypes() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "David has 5000 bones", 
+        new String[] {"David", "has", "5000", "bones"},
+        new String[] { "<ALPHANUM>", "<ALPHANUM>", "<NUM>", "<ALPHANUM>" });
+  }
+  
+  public void testUnicodeWordBreaks() throws Exception {
+    WordBreakTestUnicode_6_3_0 wordBreakTest = new WordBreakTestUnicode_6_3_0();
+    wordBreakTest.test(a);
+  }
+  
+  public void testSupplementary() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\U00029b05\u8271\u935f\u41f9\u612f\u701b", 
+        new String[] {"\U00029b05", "\u8271", "\u935f", "\u41f9", "\u612f", "\u701b"},
+        new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>" });
+  }
+  
+  public void testKorean() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\ud6c8\ubbfc\uc815\uc74c",
+        new String[] { "\ud6c8\ubbfc\uc815\uc74c" },
+        new String[] { "<HANGUL>" });
+  }
+  
+  public void testJapanese() throws Exception {
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "\u4eee\u540d\u9063\u3044 \u30ab\u30bf\u30ab\u30ca",
+        new String[] { "\u4eee", "\u540d", "\u9063", "\u3044", "\u30ab\u30bf\u30ab\u30ca" },
+        new String[] { "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>" });
+  }
+  
+  public void testCombiningMarks() throws Exception {
+    checkOneTerm(a, "\u3055\u3099", "\u3055\u3099"); // hiragana
+    checkOneTerm(a, "\u30b5\u3099", "\u30b5\u3099"); // katakana
+    checkOneTerm(a, "\u58f9\u3099", "\u58f9\u3099"); // ideographic
+    checkOneTerm(a, "\uc544\u3099",  "\uc544\u3099"); // hangul
+  }
+
+  /**
+   * Multiple consecutive chars in \p{WB:MidLetter}, \p{WB:MidNumLet},
+   * and/or \p{MidNum} should trigger a token split.
+   */
+  public void testMid() throws Exception {
+    // ':' is in \p{WB:MidLetter}, which should trigger a split unless there is a Letter char on both sides
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B", new String[] { "A:B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A::B", new String[] { "A", "B" });
+
+    // '.' is in \p{WB:MidNumLet}, which should trigger a split unless there is a Letter or Numeric char on both sides
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2", new String[] { "1.2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B", new String[] { "A.B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1..2", new String[] { "1", "2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A..B", new String[] { "A", "B" });
+
+    // ',' is in \p{WB:MidNum}, which should trigger a split unless there is a Numeric char on both sides
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2", new String[] { "1,2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,,2", new String[] { "1", "2" });
+
+    // Mixed consecutive \p{WB:MidLetter} and \p{WB:MidNumLet} should trigger a split
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.:B", new String[] { "A", "B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:.B", new String[] { "A", "B" });
+
+    // Mixed consecutive \p{WB:MidNum} and \p{WB:MidNumLet} should trigger a split
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,.2", new String[] { "1", "2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.,2", new String[] { "1", "2" });
+
+    // '_' is in \p{WB:ExtendNumLet}
+
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A:B", new String[] { "A:B_A:B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A:B_A::B", new String[] { "A:B_A", "B" });
+
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1.2", new String[] { "1.2_1.2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A.B", new String[] { "A.B_A.B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1.2_1..2", new String[] { "1.2_1", "2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "A.B_A..B", new String[] { "A.B_A", "B" });
+
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,2", new String[] { "1,2_1,2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "1,2_1,,2", new String[] { "1,2_1", "2" });
+
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A.:B", new String[] { "C_A", "B" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "C_A:.B", new String[] { "C_A", "B" });
+
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1,.2", new String[] { "3_1", "2" });
+    BaseTokenStreamTestCase.assertAnalyzesTo(a, "3_1.,2", new String[] { "3_1", "2" });
+  }
+
+
+
+  /** blast some random strings through the analyzer */
+  public void testRandomStrings() throws Exception {
+    Analyzer analyzer = new StandardAnalyzer();
+    checkRandomData(random(), analyzer, 1000*RANDOM_MULTIPLIER);
+    analyzer.close();
+  }
+  
+  /** blast some random large strings through the analyzer */
+  public void testRandomHugeStrings() throws Exception {
+    Analyzer analyzer = new StandardAnalyzer();
+    checkRandomData(random(), analyzer, 100*RANDOM_MULTIPLIER, 8192);
+    analyzer.close();
+  }
+
+  // Adds random graph after:
+  public void testRandomHugeStringsGraphAfter() throws Exception {
+    Random random = random();
+    Analyzer analyzer = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName) {
+        Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory());
+        TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer);
+        return new TokenStreamComponents(tokenizer, tokenStream);
+      }
+    };
+    checkRandomData(random, analyzer, 100*RANDOM_MULTIPLIER, 8192);
+    analyzer.close();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
index d7aa2bb..736d15d 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilter.java
@@ -18,14 +18,14 @@ package org.apache.lucene.search.suggest.analyzing;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /** Like {@link StopFilter} except it will not remove the
  *  last token if that token was not followed by some token

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
index 32baf08..3e222bc 100644
--- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
+++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/SuggestStopFilterFactory.java
@@ -16,16 +16,16 @@
  */
 package org.apache.lucene.search.suggest.analyzing;
 
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader; // jdocs
 import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
-import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
-
-import java.util.Map;
-import java.io.IOException;
 
 /**
  * Factory for {@link SuggestStopFilter}.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
index d0d3a41..69d3ed6 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java
@@ -28,13 +28,13 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.suggest.Input;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
index c2b2bed..fe14e23 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/BlendedInfixSuggesterTest.java
@@ -23,10 +23,10 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
index b26b5332..3e89275 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestFreeTextSuggester.java
@@ -32,16 +32,16 @@ import java.util.Map;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.document.Document;
-import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.search.suggest.Input;
 import org.apache.lucene.search.suggest.InputArrayIterator;
 import org.apache.lucene.search.suggest.InputIterator;
+import org.apache.lucene.search.suggest.Lookup.LookupResult;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LineFileDocs;
 import org.apache.lucene.util.LuceneTestCase;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
index 44917d2..5ed84e0 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilter.java
@@ -19,11 +19,11 @@ package org.apache.lucene.search.suggest.analyzing;
 import java.io.StringReader;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 public class TestSuggestStopFilter extends BaseTokenStreamTestCase {
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
index 58b1892..69947e4 100644
--- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
+++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/TestSuggestStopFilterFactory.java
@@ -21,8 +21,8 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.StopAnalyzer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;

[10/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
deleted file mode 100644
index afc68ce..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Fast, general-purpose grammar-based tokenizers.
- * <p>The <code>org.apache.lucene.analysis.standard</code> package contains three
- * fast grammar-based tokenizers constructed with JFlex:</p>
- * <ul>
- *     <li>{@link org.apache.lucene.analysis.standard.StandardTokenizer}:
- *         as of Lucene 3.1, implements the Word Break rules from the Unicode Text 
- *         Segmentation algorithm, as specified in 
- *         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- *         Unlike <code>UAX29URLEmailTokenizer</code>, URLs and email addresses are
- *         <b>not</b> tokenized as single tokens, but are instead split up into 
- *         tokens according to the UAX#29 word break rules.
- *         <br>
- *         {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *         When the <code>Version</code> specified in the constructor is lower than 
- *         3.1, the {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}
- *         implementation is invoked.</li>
- *     <li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
- *         this class was formerly (prior to Lucene 3.1) named 
- *         <code>StandardTokenizer</code>.  (Its tokenization rules are not
- *         based on the Unicode Text Segmentation algorithm.)
- *         {@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *     </li>
- *     <li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
- *         implements the Word Break rules from the Unicode Text Segmentation
- *         algorithm, as specified in 
- *         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- *         URLs and email addresses are also tokenized according to the relevant RFCs.
- *         <br>
- *         {@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *     </li>
- * </ul>
- */
-package org.apache.lucene.analysis.standard;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
new file mode 100644
index 0000000..055d0b2
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
@@ -0,0 +1,50 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in spatial/ -->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+ Fast, general-purpose grammar-based tokenizers.
+ <ul>
+     <li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
+         this class was formerly (prior to Lucene 3.1) named 
+         <code>StandardTokenizer</code>.  (Its tokenization rules are not
+         based on the Unicode Text Segmentation algorithm.)
+         {@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
+         {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
+         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
+         {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+         and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+     </li>
+     <li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
+         implements the Word Break rules from the Unicode Text Segmentation
+         algorithm, as specified in 
+         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>, except
+         URLs and email addresses are also tokenized according to the relevant RFCs.
+         <br>
+         {@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
+         {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
+         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
+         {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+         and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+     </li>
+ </ul>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index fd15bbd..fd2aa2e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.SwedishStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
index 2488665..8bab9a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@@ -30,9 +30,9 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index 3f2e52a..9543c5c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -20,13 +20,13 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index c9ed471..a21495f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.TurkishStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
index 8ee809c..f8de8a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
@@ -37,7 +37,9 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
deleted file mode 100644
index e414366..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ /dev/null
@@ -1,669 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.Arrays;
-import java.util.AbstractMap;
-import java.util.AbstractSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.analysis.util.CharacterUtils;
-
-/**
- * A simple class that stores key Strings as char[]'s in a
- * hash table. Note that this is not a general purpose
- * class.  For example, it cannot remove items from the
- * map, nor does it resize its hash table to be smaller,
- * etc.  It is designed to be quick to retrieve items
- * by char[] keys without the necessity of converting
- * to a String first.
- */
-public class CharArrayMap<V> extends AbstractMap<Object,V> {
-  // private only because missing generics
-  private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
-
-  private final static int INIT_SIZE = 8;
-  private boolean ignoreCase;  
-  private int count;
-  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
-  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
-
-  /**
-   * Create map with enough capacity to hold startSize terms
-   *
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  @SuppressWarnings("unchecked")
-  public CharArrayMap(int startSize, boolean ignoreCase) {
-    this.ignoreCase = ignoreCase;
-    int size = INIT_SIZE;
-    while(startSize + (startSize>>2) > size)
-      size <<= 1;
-    keys = new char[size][];
-    values = (V[]) new Object[size];
-  }
-
-  /**
-   * Creates a map from the mappings in another map. 
-   *
-   * @param c
-   *          a map whose mappings to be copied
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArrayMap(Map<?,? extends V> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
-    putAll(c);
-  }
-  
-  /** Create set from the supplied map (used internally for readonly maps...) */
-  private CharArrayMap(CharArrayMap<V> toCopy){
-    this.keys = toCopy.keys;
-    this.values = toCopy.values;
-    this.ignoreCase = toCopy.ignoreCase;
-    this.count = toCopy.count;
-  }
-  
-  /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
-  @Override
-  public void clear() {
-    count = 0;
-    Arrays.fill(keys, null);
-    Arrays.fill(values, null);
-  }
-
-  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
-   * are in the {@link #keySet()} */
-  public boolean containsKey(char[] text, int off, int len) {
-    return keys[getSlot(text, off, len)] != null;
-  }
-
-  /** true if the <code>CharSequence</code> is in the {@link #keySet()} */
-  public boolean containsKey(CharSequence cs) {
-    return keys[getSlot(cs)] != null;
-  }
-
-  @Override
-  public boolean containsKey(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return containsKey(text, 0, text.length);
-    } 
-    return containsKey(o.toString());
-  }
-
-  /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
-   * starting at <code>off</code> */
-  public V get(char[] text, int off, int len) {
-    return values[getSlot(text, off, len)];
-  }
-
-  /** returns the value of the mapping of the chars inside this {@code CharSequence} */
-  public V get(CharSequence cs) {
-    return values[getSlot(cs)];
-  }
-
-  @Override
-  public V get(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return get(text, 0, text.length);
-    } 
-    return get(o.toString());
-  }
-
-  private int getSlot(char[] text, int off, int len) {
-    int code = getHashCode(text, off, len);
-    int pos = code & (keys.length-1);
-    char[] text2 = keys[pos];
-    if (text2 != null && !equals(text, off, len, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (keys.length-1);
-        text2 = keys[pos];
-      } while (text2 != null && !equals(text, off, len, text2));
-    }
-    return pos;
-  }
-
-  /** Returns true if the String is in the set */  
-  private int getSlot(CharSequence text) {
-    int code = getHashCode(text);
-    int pos = code & (keys.length-1);
-    char[] text2 = keys[pos];
-    if (text2 != null && !equals(text, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (keys.length-1);
-        text2 = keys[pos];
-      } while (text2 != null && !equals(text, text2));
-    }
-    return pos;
-  }
-
-  /** Add the given mapping. */
-  public V put(CharSequence text, V value) {
-    return put(text.toString(), value); // could be more efficient
-  }
-
-  @Override
-  public V put(Object o, V value) {
-    if (o instanceof char[]) {
-      return put((char[])o, value);
-    }
-    return put(o.toString(), value);
-  }
-  
-  /** Add the given mapping. */
-  public V put(String text, V value) {
-    return put(text.toCharArray(), value);
-  }
-
-  /** Add the given mapping.
-   * If ignoreCase is true for this Set, the text array will be directly modified.
-   * The user should never modify this text array after calling this method.
-   */
-  public V put(char[] text, V value) {
-    if (ignoreCase) {
-      CharacterUtils.toLowerCase(text, 0, text.length);
-    }
-    int slot = getSlot(text, 0, text.length);
-    if (keys[slot] != null) {
-      final V oldValue = values[slot];
-      values[slot] = value;
-      return oldValue;
-    }
-    keys[slot] = text;
-    values[slot] = value;
-    count++;
-
-    if (count + (count>>2) > keys.length) {
-      rehash();
-    }
-
-    return null;
-  }
-
-  @SuppressWarnings("unchecked")
-  private void rehash() {
-    assert keys.length == values.length;
-    final int newSize = 2*keys.length;
-    final char[][] oldkeys = keys;
-    final V[] oldvalues = values;
-    keys = new char[newSize][];
-    values = (V[]) new Object[newSize];
-
-    for(int i=0; i<oldkeys.length; i++) {
-      char[] text = oldkeys[i];
-      if (text != null) {
-        // todo: could be faster... no need to compare strings on collision
-        final int slot = getSlot(text,0,text.length);
-        keys[slot] = text;
-        values[slot] = oldvalues[i];
-      }
-    }
-  }
-  
-  private boolean equals(char[] text1, int off, int len, char[] text2) {
-    if (len != text2.length)
-      return false;
-    final int limit = off+len;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = Character.codePointAt(text1, off+i, limit);
-        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
-          return false;
-        i += Character.charCount(codePointAt); 
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1[off+i] != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-
-  private boolean equals(CharSequence text1, char[] text2) {
-    int len = text1.length();
-    if (len != text2.length)
-      return false;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = Character.codePointAt(text1, i);
-        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
-          return false;
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1.charAt(i) != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-  
-  private int getHashCode(char[] text, int offset, int len) {
-    if (text == null)
-      throw new NullPointerException();
-    int code = 0;
-    final int stop = offset + len;
-    if (ignoreCase) {
-      for (int i=offset; i<stop;) {
-        final int codePointAt = Character.codePointAt(text, i, stop);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=offset; i<stop; i++) {
-        code = code*31 + text[i];
-      }
-    }
-    return code;
-  }
-
-  private int getHashCode(CharSequence text) {
-    if (text == null)
-      throw new NullPointerException();
-    int code = 0;
-    int len = text.length();
-    if (ignoreCase) {
-      for (int i=0; i<len;) {
-        int codePointAt = Character.codePointAt(text, i);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=0; i<len; i++) {
-        code = code*31 + text.charAt(i);
-      }
-    }
-    return code;
-  }
-
-  @Override
-  public V remove(Object key) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public int size() {
-    return count;
-  }
-
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder("{");
-    for (Map.Entry<Object,V> entry : entrySet()) {
-      if (sb.length()>1) sb.append(", ");
-      sb.append(entry);
-    }
-    return sb.append('}').toString();
-  }
-
-  private EntrySet entrySet = null;
-  private CharArraySet keySet = null;
-  
-  EntrySet createEntrySet() {
-    return new EntrySet(true);
-  }
-  
-  @Override
-  public final EntrySet entrySet() {
-    if (entrySet == null) {
-      entrySet = createEntrySet();
-    }
-    return entrySet;
-  }
-  
-  // helper for CharArraySet to not produce endless recursion
-  final Set<Object> originalKeySet() {
-    return super.keySet();
-  }
-
-  /** Returns an {@link CharArraySet} view on the map's keys.
-   * The set will use the same {@code matchVersion} as this map. */
-  @Override @SuppressWarnings({"unchecked","rawtypes"})
-  public final CharArraySet keySet() {
-    if (keySet == null) {
-      // prevent adding of entries
-      keySet = new CharArraySet((CharArrayMap) this) {
-        @Override
-        public boolean add(Object o) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(CharSequence text) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(String text) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(char[] text) {
-          throw new UnsupportedOperationException();
-        }
-      };
-    }
-    return keySet;
-  }
-
-  /** public iterator class so efficient methods are exposed to users */
-  public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
-    private int pos=-1;
-    private int lastPos;
-    private final boolean allowModify;
-    
-    private EntryIterator(boolean allowModify) {
-      this.allowModify = allowModify;
-      goNext();
-    }
-
-    private void goNext() {
-      lastPos = pos;
-      pos++;
-      while (pos < keys.length && keys[pos] == null) pos++;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return pos < keys.length;
-    }
-
-    /** gets the next key... do not modify the returned char[] */
-    public char[] nextKey() {
-      goNext();
-      return keys[lastPos];
-    }
-
-    /** gets the next key as a newly created String object */
-    public String nextKeyString() {
-      return new String(nextKey());
-    }
-
-    /** returns the value associated with the last key returned */
-    public V currentValue() {
-      return values[lastPos];
-    }
-
-    /** sets the value associated with the last key returned */    
-    public V setValue(V value) {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      V old = values[lastPos];
-      values[lastPos] = value;
-      return old;      
-    }
-
-    /** use nextCharArray() + currentValue() for better efficiency. */
-    @Override
-    public Map.Entry<Object,V> next() {
-      goNext();
-      return new MapEntry(lastPos, allowModify);
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  private final class MapEntry implements Map.Entry<Object,V> {
-    private final int pos;
-    private final boolean allowModify;
-
-    private MapEntry(int pos, boolean allowModify) {
-      this.pos = pos;
-      this.allowModify = allowModify;
-    }
-
-    @Override
-    public Object getKey() {
-      // we must clone here, as putAll to another CharArrayMap
-      // with other case sensitivity flag would corrupt the keys
-      return keys[pos].clone();
-    }
-
-    @Override
-    public V getValue() {
-      return values[pos];
-    }
-
-    @Override
-    public V setValue(V value) {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      final V old = values[pos];
-      values[pos] = value;
-      return old;
-    }
-
-    @Override
-    public String toString() {
-      return new StringBuilder().append(keys[pos]).append('=')
-        .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
-        .toString();
-    }
-  }
-
-  /** public EntrySet class so efficient methods are exposed to users */
-  public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
-    private final boolean allowModify;
-    
-    private EntrySet(boolean allowModify) {
-      this.allowModify = allowModify;
-    }
-  
-    @Override
-    public EntryIterator iterator() {
-      return new EntryIterator(allowModify);
-    }
-    
-    @Override
-    @SuppressWarnings("unchecked")
-    public boolean contains(Object o) {
-      if (!(o instanceof Map.Entry))
-        return false;
-      final Map.Entry<Object,V> e = (Map.Entry<Object,V>)o;
-      final Object key = e.getKey();
-      final Object val = e.getValue();
-      final Object v = get(key);
-      return v == null ? val == null : v.equals(val);
-    }
-    
-    @Override
-    public boolean remove(Object o) {
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public int size() {
-      return count;
-    }
-    
-    @Override
-    public void clear() {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      CharArrayMap.this.clear();
-    }
-  }
-  
-  /**
-   * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
-   * unmodifiable views of internal map for "read-only" use.
-   * 
-   * @param map
-   *          a map for which the unmodifiable map is returned.
-   * @return an new unmodifiable {@link CharArrayMap}.
-   * @throws NullPointerException
-   *           if the given map is <code>null</code>.
-   */
-  public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
-    if (map == null)
-      throw new NullPointerException("Given map is null");
-    if (map == emptyMap() || map.isEmpty())
-      return emptyMap();
-    if (map instanceof UnmodifiableCharArrayMap)
-      return map;
-    return new UnmodifiableCharArrayMap<>(map);
-  }
-
-  /**
-   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
-   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
-   * 
-   * @param map
-   *          a map to copy
-   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
-   *         is a {@link CharArrayMap} the ignoreCase property as well as the
-   *         matchVersion will be of the given map will be preserved.
-   */
-  @SuppressWarnings("unchecked")
-  public static <V> CharArrayMap<V> copy(final Map<?,? extends V> map) {
-    if(map == EMPTY_MAP)
-      return emptyMap();
-    if(map instanceof CharArrayMap) {
-      CharArrayMap<V> m = (CharArrayMap<V>) map;
-      // use fast path instead of iterating all values
-      // this is even on very small sets ~10 times faster than iterating
-      final char[][] keys = new char[m.keys.length][];
-      System.arraycopy(m.keys, 0, keys, 0, keys.length);
-      final V[] values = (V[]) new Object[m.values.length];
-      System.arraycopy(m.values, 0, values, 0, values.length);
-      m = new CharArrayMap<>(m);
-      m.keys = keys;
-      m.values = values;
-      return m;
-    }
-    return new CharArrayMap<>(map, false);
-  }
-  
-  /** Returns an empty, unmodifiable map. */
-  @SuppressWarnings("unchecked")
-  public static <V> CharArrayMap<V> emptyMap() {
-    return (CharArrayMap<V>) EMPTY_MAP;
-  }
-  
-  // package private CharArraySet instanceof check in CharArraySet
-  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
-
-    UnmodifiableCharArrayMap(CharArrayMap<V> map) {
-      super(map);
-    }
-
-    @Override
-    public void clear() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(Object o, V val){
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public V put(char[] text, V val) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(CharSequence text, V val) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(String text, V val) {
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public V remove(Object key) {
-      throw new UnsupportedOperationException();
-    }
-  
-    @Override
-    EntrySet createEntrySet() {
-      return new EntrySet(false);
-    }
-  }
-  
-  /**
-   * Empty {@link org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap} optimized for speed.
-   * Contains checks will always return <code>false</code> or throw
-   * NPE if necessary.
-   */
-  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
-    EmptyCharArrayMap() {
-      super(new CharArrayMap<V>(0, false));
-    }
-    
-    @Override
-    public boolean containsKey(char[] text, int off, int len) {
-      if(text == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean containsKey(CharSequence cs) {
-      if(cs == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean containsKey(Object o) {
-      if(o == null)
-        throw new NullPointerException();
-      return false;
-    }
-    
-    @Override
-    public V get(char[] text, int off, int len) {
-      if(text == null)
-        throw new NullPointerException();
-      return null;
-    }
-
-    @Override
-    public V get(CharSequence cs) {
-      if(cs == null)
-        throw new NullPointerException();
-      return null;
-    }
-
-    @Override
-    public V get(Object o) {
-      if(o == null)
-        throw new NullPointerException();
-      return null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
deleted file mode 100644
index 15485bc..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Set;
-
-/**
- * A simple class that stores Strings as char[]'s in a
- * hash table.  Note that this is not a general purpose
- * class.  For example, it cannot remove items from the
- * set, nor does it resize its hash table to be smaller,
- * etc.  It is designed to be quick to test if a char[]
- * is in the set without the necessity of converting it
- * to a String first.
- *
- * <P>
- * <em>Please note:</em> This class implements {@link java.util.Set Set} but
- * does not behave like it should in all cases. The generic type is
- * {@code Set<Object>}, because you can add any object to it,
- * that has a string representation. The add methods will use
- * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator<char[]>}.
- */
-public class CharArraySet extends AbstractSet<Object> {
-  public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
-  private static final Object PLACEHOLDER = new Object();
-  
-  private final CharArrayMap<Object> map;
-  
-  /**
-   * Create set with enough capacity to hold startSize terms
-   * 
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArraySet(int startSize, boolean ignoreCase) {
-    this(new CharArrayMap<>(startSize, ignoreCase));
-  }
-
-  /**
-   * Creates a set from a Collection of objects. 
-   * 
-   * @param c
-   *          a collection whose elements to be placed into the set
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArraySet(Collection<?> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
-    addAll(c);
-  }
-
-  /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
-  CharArraySet(final CharArrayMap<Object> map){
-    this.map = map;
-  }
-  
-  /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
-  @Override
-  public void clear() {
-    map.clear();
-  }
-
-  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
-   * are in the set */
-  public boolean contains(char[] text, int off, int len) {
-    return map.containsKey(text, off, len);
-  }
-
-  /** true if the <code>CharSequence</code> is in the set */
-  public boolean contains(CharSequence cs) {
-    return map.containsKey(cs);
-  }
-
-  @Override
-  public boolean contains(Object o) {
-    return map.containsKey(o);
-  }
-
-  @Override
-  public boolean add(Object o) {
-    return map.put(o, PLACEHOLDER) == null;
-  }
-
-  /** Add this CharSequence into the set */
-  public boolean add(CharSequence text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-  
-  /** Add this String into the set */
-  public boolean add(String text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-
-  /** Add this char[] directly to the set.
-   * If ignoreCase is true for this Set, the text array will be directly modified.
-   * The user should never modify this text array after calling this method.
-   */
-  public boolean add(char[] text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-
-  @Override
-  public int size() {
-    return map.size();
-  }
-  
-  /**
-   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
-   * unmodifiable views of internal sets for "read-only" use.
-   * 
-   * @param set
-   *          a set for which the unmodifiable set is returned.
-   * @return an new unmodifiable {@link CharArraySet}.
-   * @throws NullPointerException
-   *           if the given set is <code>null</code>.
-   */
-  public static CharArraySet unmodifiableSet(CharArraySet set) {
-    if (set == null)
-      throw new NullPointerException("Given set is null");
-    if (set == EMPTY_SET)
-      return EMPTY_SET;
-    if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
-      return set;
-    return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
-  }
-
-  /**
-   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
-   * is a {@link CharArraySet} the ignoreCase property will be preserved.
-   * 
-   * @param set
-   *          a set to copy
-   * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase property as well as the
-   *         matchVersion will be of the given set will be preserved.
-   */
-  public static CharArraySet copy(final Set<?> set) {
-    if(set == EMPTY_SET)
-      return EMPTY_SET;
-    if(set instanceof CharArraySet) {
-      final CharArraySet source = (CharArraySet) set;
-      return new CharArraySet(CharArrayMap.copy(source.map));
-    }
-    return new CharArraySet(set, false);
-  }
-  
-  /**
-   * Returns an {@link Iterator} for {@code char[]} instances in this set.
-   */
-  @Override @SuppressWarnings("unchecked")
-  public Iterator<Object> iterator() {
-    // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
-    return map.originalKeySet().iterator();
-  }
-  
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder("[");
-    for (Object item : this) {
-      if (sb.length()>1) sb.append(", ");
-      if (item instanceof char[]) {
-        sb.append((char[]) item);
-      } else {
-        sb.append(item);
-      }
-    }
-    return sb.append(']').toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index 4952f99..9100345 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -22,16 +22,16 @@ import java.util.Objects;
 import java.util.function.IntPredicate;
 import java.util.function.IntUnaryOperator;
 
+import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.CharacterUtils;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
 
 /**
  * An abstract base class for simple, character-oriented tokenizers.
@@ -285,4 +285,4 @@ public abstract class CharTokenizer extends Tokenizer {
     finalOffset = 0;
     ioBuffer.reset(); // make sure to reset the IO buffer!!
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
deleted file mode 100644
index b728523..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * Utility class to write tokenizers or token filters.
- * @lucene.internal
- */
-public final class CharacterUtils {
-
-  private CharacterUtils() {} // no instantiation
-
-  /**
-   * Creates a new {@link CharacterBuffer} and allocates a <code>char[]</code>
-   * of the given bufferSize.
-   * 
-   * @param bufferSize
-   *          the internal char buffer size, must be <code>&gt;= 2</code>
-   * @return a new {@link CharacterBuffer} instance.
-   */
-  public static CharacterBuffer newCharacterBuffer(final int bufferSize) {
-    if (bufferSize < 2) {
-      throw new IllegalArgumentException("buffersize must be >= 2");
-    }
-    return new CharacterBuffer(new char[bufferSize], 0, 0);
-  }
-  
-  
-  /**
-   * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting 
-   * at the given offset.
-   * @param buffer the char buffer to lowercase
-   * @param offset the offset to start at
-   * @param limit the max char in the buffer to lower case
-   */
-  public static void toLowerCase(final char[] buffer, final int offset, final int limit) {
-    assert buffer.length >= limit;
-    assert offset <=0 && offset <= buffer.length;
-    for (int i = offset; i < limit;) {
-      i += Character.toChars(
-              Character.toLowerCase(
-                  Character.codePointAt(buffer, i, limit)), buffer, i);
-     }
-  }
-
-  /**
-   * Converts each unicode codepoint to UpperCase via {@link Character#toUpperCase(int)} starting 
-   * at the given offset.
-   * @param buffer the char buffer to UPPERCASE
-   * @param offset the offset to start at
-   * @param limit the max char in the buffer to lower case
-   */
-  public static void toUpperCase(final char[] buffer, final int offset, final int limit) {
-    assert buffer.length >= limit;
-    assert offset <=0 && offset <= buffer.length;
-    for (int i = offset; i < limit;) {
-      i += Character.toChars(
-              Character.toUpperCase(
-                  Character.codePointAt(buffer, i, limit)), buffer, i);
-     }
-  }
-
-  /** Converts a sequence of Java characters to a sequence of unicode code points.
-   *  @return the number of code points written to the destination buffer */
-  public static int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff) {
-    if (srcLen < 0) {
-      throw new IllegalArgumentException("srcLen must be >= 0");
-    }
-    int codePointCount = 0;
-    for (int i = 0; i < srcLen; ) {
-      final int cp = Character.codePointAt(src, srcOff + i, srcOff + srcLen);
-      final int charCount = Character.charCount(cp);
-      dest[destOff + codePointCount++] = cp;
-      i += charCount;
-    }
-    return codePointCount;
-  }
-
-  /** Converts a sequence of unicode code points to a sequence of Java characters.
-   *  @return the number of chars written to the destination buffer */
-  public static int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff) {
-    if (srcLen < 0) {
-      throw new IllegalArgumentException("srcLen must be >= 0");
-    }
-    int written = 0;
-    for (int i = 0; i < srcLen; ++i) {
-      written += Character.toChars(src[srcOff + i], dest, destOff + written);
-    }
-    return written;
-  }
-
-  /**
-   * Fills the {@link CharacterBuffer} with characters read from the given
-   * reader {@link Reader}. This method tries to read <code>numChars</code>
-   * characters into the {@link CharacterBuffer}, each call to fill will start
-   * filling the buffer from offset <code>0</code> up to <code>numChars</code>.
-   * In case code points can span across 2 java characters, this method may
-   * only fill <code>numChars - 1</code> characters in order not to split in
-   * the middle of a surrogate pair, even if there are remaining characters in
-   * the {@link Reader}.
-   * <p>
-   * This method guarantees
-   * that the given {@link CharacterBuffer} will never contain a high surrogate
-   * character as the last element in the buffer unless it is the last available
-   * character in the reader. In other words, high and low surrogate pairs will
-   * always be preserved across buffer boarders.
-   * </p>
-   * <p>
-   * A return value of <code>false</code> means that this method call exhausted
-   * the reader, but there may be some bytes which have been read, which can be
-   * verified by checking whether <code>buffer.getLength() &gt; 0</code>.
-   * </p>
-   * 
-   * @param buffer
-   *          the buffer to fill.
-   * @param reader
-   *          the reader to read characters from.
-   * @param numChars
-   *          the number of chars to read
-   * @return <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer
-   * @throws IOException
-   *           if the reader throws an {@link IOException}.
-   */
-  public static boolean fill(CharacterBuffer buffer, Reader reader, int numChars) throws IOException {
-    assert buffer.buffer.length >= 2;
-    if (numChars < 2 || numChars > buffer.buffer.length) {
-      throw new IllegalArgumentException("numChars must be >= 2 and <= the buffer size");
-    }
-    final char[] charBuffer = buffer.buffer;
-    buffer.offset = 0;
-    final int offset;
-
-    // Install the previously saved ending high surrogate:
-    if (buffer.lastTrailingHighSurrogate != 0) {
-      charBuffer[0] = buffer.lastTrailingHighSurrogate;
-      buffer.lastTrailingHighSurrogate = 0;
-      offset = 1;
-    } else {
-      offset = 0;
-    }
-
-    final int read = readFully(reader, charBuffer, offset, numChars - offset);
-
-    buffer.length = offset + read;
-    final boolean result = buffer.length == numChars;
-    if (buffer.length < numChars) {
-      // We failed to fill the buffer. Even if the last char is a high
-      // surrogate, there is nothing we can do
-      return result;
-    }
-
-    if (Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
-      buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
-    }
-    return result;
-  }
-
-  /** Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. */
-  public static boolean fill(CharacterBuffer buffer, Reader reader) throws IOException {
-    return fill(buffer, reader, buffer.buffer.length);
-  }
-
-  static int readFully(Reader reader, char[] dest, int offset, int len) throws IOException {
-    int read = 0;
-    while (read < len) {
-      final int r = reader.read(dest, offset + read, len - read);
-      if (r == -1) {
-        break;
-      }
-      read += r;
-    }
-    return read;
-  }
-
-  /**
-   * A simple IO buffer to use with
-   * {@link CharacterUtils#fill(CharacterBuffer, Reader)}.
-   */
-  public static final class CharacterBuffer {
-    
-    private final char[] buffer;
-    private int offset;
-    private int length;
-    // NOTE: not private so outer class can access without
-    // $access methods:
-    char lastTrailingHighSurrogate;
-    
-    CharacterBuffer(char[] buffer, int offset, int length) {
-      this.buffer = buffer;
-      this.offset = offset;
-      this.length = length;
-    }
-    
-    /**
-     * Returns the internal buffer
-     * 
-     * @return the buffer
-     */
-    public char[] getBuffer() {
-      return buffer;
-    }
-    
-    /**
-     * Returns the data offset in the internal buffer.
-     * 
-     * @return the offset
-     */
-    public int getOffset() {
-      return offset;
-    }
-    
-    /**
-     * Return the length of the data in the internal buffer starting at
-     * {@link #getOffset()}
-     * 
-     * @return the length
-     */
-    public int getLength() {
-      return length;
-    }
-    
-    /**
-     * Resets the CharacterBuffer. All internals are reset to its default
-     * values.
-     */
-    public void reset() {
-      offset = 0;
-      length = 0;
-      lastTrailingHighSurrogate = 0;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
index d7689f9..be5f04c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
index 31c3027..fff3edc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
 import java.io.IOException;
 import java.util.Map;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
deleted file mode 100644
index 97d35e2..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * Abstract base class for TokenFilters that may remove tokens.
- * You have to implement {@link #accept} and return a boolean if the current
- * token should be preserved. {@link #incrementToken} uses this method
- * to decide if a token should be passed to the caller.
- */
-public abstract class FilteringTokenFilter extends TokenFilter {
-
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private int skippedPositions;
-
-  /**
-   * Create a new {@link FilteringTokenFilter}.
-   * @param in      the {@link TokenStream} to consume
-   */
-  public FilteringTokenFilter(TokenStream in) {
-    super(in);
-  }
-
-  /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
-  protected abstract boolean accept() throws IOException;
-
-  @Override
-  public final boolean incrementToken() throws IOException {
-    skippedPositions = 0;
-    while (input.incrementToken()) {
-      if (accept()) {
-        if (skippedPositions != 0) {
-          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-        }
-        return true;
-      }
-      skippedPositions += posIncrAtt.getPositionIncrement();
-    }
-
-    // reached EOS -- return false
-    return false;
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    skippedPositions = 0;
-  }
-
-  @Override
-  public void end() throws IOException {
-    super.end();
-    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
deleted file mode 100644
index fc6c798..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Base class for Analyzers that need to make use of stopword sets. 
- * 
- */
-public abstract class StopwordAnalyzerBase extends Analyzer {
-
-  /**
-   * An immutable stopword set
-   */
-  protected final CharArraySet stopwords;
-
-  /**
-   * Returns the analyzer's stopword set or an empty set if the analyzer has no
-   * stopwords
-   * 
-   * @return the analyzer's stopword set or an empty set if the analyzer has no
-   *         stopwords
-   */
-  public CharArraySet getStopwordSet() {
-    return stopwords;
-  }
-
-  /**
-   * Creates a new instance initialized with the given stopword set
-   * 
-   * @param stopwords
-   *          the analyzer's stopword set
-   */
-  protected StopwordAnalyzerBase(final CharArraySet stopwords) {
-    // analyzers should use char array set for stopwords!
-    this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
-        .unmodifiableSet(CharArraySet.copy(stopwords));
-  }
-
-  /**
-   * Creates a new Analyzer with an empty stopword set
-   */
-  protected StopwordAnalyzerBase() {
-    this(null);
-  }
-
-  /**
-   * Creates a CharArraySet from a file resource associated with a class. (See
-   * {@link Class#getResourceAsStream(String)}).
-   * 
-   * @param ignoreCase
-   *          <code>true</code> if the set should ignore the case of the
-   *          stopwords, otherwise <code>false</code>
-   * @param aClass
-   *          a class that is associated with the given stopwordResource
-   * @param resource
-   *          name of the resource file associated with the given class
-   * @param comment
-   *          comment string to ignore in the stopword file
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         file
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
-      final Class<? extends Analyzer> aClass, final String resource,
-      final String comment) throws IOException {
-    Reader reader = null;
-    try {
-      reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
-      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
-    } finally {
-      IOUtils.close(reader);
-    }
-    
-  }
-  
-  /**
-   * Creates a CharArraySet from a path.
-   * 
-   * @param stopwords
-   *          the stopwords file to load
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         file
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(Path stopwords) throws IOException {
-    Reader reader = null;
-    try {
-      reader = Files.newBufferedReader(stopwords, StandardCharsets.UTF_8);
-      return WordlistLoader.getWordSet(reader);
-    } finally {
-      IOUtils.close(reader);
-    }
-  }
-  
-  /**
-   * Creates a CharArraySet from a file.
-   * 
-   * @param stopwords
-   *          the stopwords reader to load
-   * 
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         reader
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(Reader stopwords) throws IOException {
-    try {
-      return WordlistLoader.getWordSet(stopwords);
-    } finally {
-      IOUtils.close(stopwords);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
deleted file mode 100644
index 4d99965..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Loader for text files that represent a list of stopwords.
- * 
- * @see IOUtils to obtain {@link Reader} instances
- * @lucene.internal
- */
-public class WordlistLoader {
-  
-  private static final int INITIAL_CAPACITY = 16;
-  
-  /** no instance */
-  private WordlistLoader() {}
-  
-  /**
-   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, CharArraySet result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String word = null;
-      while ((word = br.readLine()) != null) {
-        result.add(word.trim());
-      }
-    }
-    finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @return A {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader) throws IOException {
-    return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-  /**
-   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param comment The string representing a comment.
-   * @return A CharArraySet with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
-    return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-  /**
-   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param comment The string representing a comment.
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, String comment, CharArraySet result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String word = null;
-      while ((word = br.readLine()) != null) {
-        if (word.startsWith(comment) == false){
-          result.add(word.trim());
-        }
-      }
-    }
-    finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-
-  
-  /**
-   * Reads stopwords from a stopword list in Snowball format.
-   * <p>
-   * The snowball format is the following:
-   * <ul>
-   * <li>Lines may contain multiple words separated by whitespace.
-   * <li>The comment character is the vertical line (&#124;).
-   * <li>Lines may contain trailing comments.
-   * </ul>
-   * 
-   * @param reader Reader containing a Snowball stopword list
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
-      throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        int comment = line.indexOf('|');
-        if (comment >= 0) line = line.substring(0, comment);
-        String words[] = line.split("\\s+");
-        for (int i = 0; i < words.length; i++)
-          if (words[i].length() > 0) result.add(words[i]);
-      }
-    } finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Reads stopwords from a stopword list in Snowball format.
-   * <p>
-   * The snowball format is the following:
-   * <ul>
-   * <li>Lines may contain multiple words separated by whitespace.
-   * <li>The comment character is the vertical line (&#124;).
-   * <li>Lines may contain trailing comments.
-   * </ul>
-   * 
-   * @param reader Reader containing a Snowball stopword list
-   * @return A {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getSnowballWordSet(Reader reader) throws IOException {
-    return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-
-  /**
-   * Reads a stem dictionary. Each line contains:
-   * <pre>word<b>\t</b>stem</pre>
-   * (i.e. two tab separated words)
-   *
-   * @return stem dictionary that overrules the stemming algorithm
-   * @throws IOException If there is a low-level I/O error.
-   */
-  public static CharArrayMap<String> getStemDict(Reader reader, CharArrayMap<String> result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String line;
-      while ((line = br.readLine()) != null) {
-        String[] wordstem = line.split("\t", 2);
-        result.put(wordstem[0], wordstem[1]);
-      }
-    } finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Accesses a resource by name and returns the (non comment) lines containing
-   * data using the given character encoding.
-   *
-   * <p>
-   * A comment line is any line that starts with the character "#"
-   * </p>
-   *
-   * @return a list of non-blank non-comment lines with whitespace trimmed
-   * @throws IOException If there is a low-level I/O error.
-   */
-  public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
-    BufferedReader input = null;
-    ArrayList<String> lines;
-    boolean success = false;
-    try {
-      input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
-
-      lines = new ArrayList<>();
-      for (String word=null; (word=input.readLine())!=null;) {
-        // skip initial bom marker
-        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
-          word = word.substring(1);
-        // skip comments
-        if (word.startsWith("#")) continue;
-        word=word.trim();
-        // skip blank lines
-        if (word.length()==0) continue;
-        lines.add(word);
-      }
-      success = true;
-      return lines;
-    } finally {
-      if (success) {
-        IOUtils.close(input);
-      } else {
-        IOUtils.closeWhileHandlingException(input);
-      }
-    }
-  }
-  
-  private static BufferedReader getBufferedReader(Reader reader) {
-    return (reader instanceof BufferedReader) ? (BufferedReader) reader
-        : new BufferedReader(reader);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
index 19a1c7e..e56071a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
@@ -36,7 +36,7 @@
  *   </li>
  *   <li>
  *     Effective Locale-specific normalization (case differences, diacritics, etc.).
- *     ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and 
+ *     ({@link org.apache.lucene.analysis.LowerCaseFilter} and 
  *     {@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
  *     in a generic way that doesn't take into account locale-specific needs.)
  *   </li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
index 9842687..2a6a1c7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ar;
 import java.io.IOException;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
index ca15485..872e7f5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
@@ -21,11 +21,11 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Arabic Normalization Filter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
index 7f890b9..582d8e4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test the Bulgarian analyzer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
index daad7bb..2538717 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
@@ -22,11 +22,11 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Bulgarian Stemmer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
index a05dd0b..550a62a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
@@ -22,11 +22,11 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Brazilian Stem Filter, which only modifies the term text.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
index fd65332..289f22b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
index 72c510c..1a47e42 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Most tests adopted from TestCJKTokenizer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
index d08817c..dcb083d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
index 1171574..e940489 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
@@ -20,9 +20,9 @@ import java.io.StringReader;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Tests CommonGrams(Query)Filter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
index 98c351e..5bcfb3d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
@@ -17,18 +17,18 @@
 package org.apache.lucene.analysis.commongrams;
 
 
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.core.TestStopFilterFactory;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;
 
-import java.io.StringReader;
-
 /**
  * Tests pretty much copied from StopFilterFactoryTest We use the test files
  * used by the StopFilterFactoryTest TODO: consider creating separate test files
@@ -37,7 +37,7 @@ import java.io.StringReader;
 public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testInform() throws Exception {
-    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
+    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
     assertTrue("loader is null and it shouldn't be", loader != null);
     CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
         "words", "stop-1.txt", 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
index 776365e..23d1bd4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
@@ -16,12 +16,11 @@
  */
 package org.apache.lucene.analysis.commongrams;
 
-
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.core.TestStopFilterFactory;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;
@@ -34,7 +33,7 @@ import org.apache.lucene.util.Version;
 public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testInform() throws Exception {
-    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
+    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
     assertTrue("loader is null and it shouldn't be", loader != null);
     CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
         "words", "stop-1.txt", 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 636d9ba..ed3abe4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -32,7 +33,6 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;

[05/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/CharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/CharArrayMap.java b/lucene/core/src/java/org/apache/lucene/analysis/CharArrayMap.java
new file mode 100644
index 0000000..e7e610a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/CharArrayMap.java
@@ -0,0 +1,669 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.util.Arrays;
+import java.util.AbstractMap;
+import java.util.AbstractSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lucene.analysis.CharacterUtils;
+
+/**
+ * A simple class that stores key Strings as char[]'s in a
+ * hash table. Note that this is not a general purpose
+ * class.  For example, it cannot remove items from the
+ * map, nor does it resize its hash table to be smaller,
+ * etc.  It is designed to be quick to retrieve items
+ * by char[] keys without the necessity of converting
+ * to a String first.
+ */
+public class CharArrayMap<V> extends AbstractMap<Object,V> {
+  // private only because missing generics
+  private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
+
+  private final static int INIT_SIZE = 8;
+  private boolean ignoreCase;  
+  private int count;
+  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+
+  /**
+   * Create map with enough capacity to hold startSize terms
+   *
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  @SuppressWarnings("unchecked")
+  public CharArrayMap(int startSize, boolean ignoreCase) {
+    this.ignoreCase = ignoreCase;
+    int size = INIT_SIZE;
+    while(startSize + (startSize>>2) > size)
+      size <<= 1;
+    keys = new char[size][];
+    values = (V[]) new Object[size];
+  }
+
+  /**
+   * Creates a map from the mappings in another map. 
+   *
+   * @param c
+   *          a map whose mappings to be copied
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArrayMap(Map<?,? extends V> c, boolean ignoreCase) {
+    this(c.size(), ignoreCase);
+    putAll(c);
+  }
+  
+  /** Create set from the supplied map (used internally for readonly maps...) */
+  private CharArrayMap(CharArrayMap<V> toCopy){
+    this.keys = toCopy.keys;
+    this.values = toCopy.values;
+    this.ignoreCase = toCopy.ignoreCase;
+    this.count = toCopy.count;
+  }
+  
+  /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
+  @Override
+  public void clear() {
+    count = 0;
+    Arrays.fill(keys, null);
+    Arrays.fill(values, null);
+  }
+
+  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+   * are in the {@link #keySet()} */
+  public boolean containsKey(char[] text, int off, int len) {
+    return keys[getSlot(text, off, len)] != null;
+  }
+
+  /** true if the <code>CharSequence</code> is in the {@link #keySet()} */
+  public boolean containsKey(CharSequence cs) {
+    return keys[getSlot(cs)] != null;
+  }
+
+  @Override
+  public boolean containsKey(Object o) {
+    if (o instanceof char[]) {
+      final char[] text = (char[])o;
+      return containsKey(text, 0, text.length);
+    } 
+    return containsKey(o.toString());
+  }
+
+  /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
+   * starting at <code>off</code> */
+  public V get(char[] text, int off, int len) {
+    return values[getSlot(text, off, len)];
+  }
+
+  /** returns the value of the mapping of the chars inside this {@code CharSequence} */
+  public V get(CharSequence cs) {
+    return values[getSlot(cs)];
+  }
+
+  @Override
+  public V get(Object o) {
+    if (o instanceof char[]) {
+      final char[] text = (char[])o;
+      return get(text, 0, text.length);
+    } 
+    return get(o.toString());
+  }
+
+  private int getSlot(char[] text, int off, int len) {
+    int code = getHashCode(text, off, len);
+    int pos = code & (keys.length-1);
+    char[] text2 = keys[pos];
+    if (text2 != null && !equals(text, off, len, text2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (keys.length-1);
+        text2 = keys[pos];
+      } while (text2 != null && !equals(text, off, len, text2));
+    }
+    return pos;
+  }
+
+  /** Returns true if the String is in the set */  
+  private int getSlot(CharSequence text) {
+    int code = getHashCode(text);
+    int pos = code & (keys.length-1);
+    char[] text2 = keys[pos];
+    if (text2 != null && !equals(text, text2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (keys.length-1);
+        text2 = keys[pos];
+      } while (text2 != null && !equals(text, text2));
+    }
+    return pos;
+  }
+
+  /** Add the given mapping. */
+  public V put(CharSequence text, V value) {
+    return put(text.toString(), value); // could be more efficient
+  }
+
+  @Override
+  public V put(Object o, V value) {
+    if (o instanceof char[]) {
+      return put((char[])o, value);
+    }
+    return put(o.toString(), value);
+  }
+  
+  /** Add the given mapping. */
+  public V put(String text, V value) {
+    return put(text.toCharArray(), value);
+  }
+
+  /** Add the given mapping.
+   * If ignoreCase is true for this Set, the text array will be directly modified.
+   * The user should never modify this text array after calling this method.
+   */
+  public V put(char[] text, V value) {
+    if (ignoreCase) {
+      CharacterUtils.toLowerCase(text, 0, text.length);
+    }
+    int slot = getSlot(text, 0, text.length);
+    if (keys[slot] != null) {
+      final V oldValue = values[slot];
+      values[slot] = value;
+      return oldValue;
+    }
+    keys[slot] = text;
+    values[slot] = value;
+    count++;
+
+    if (count + (count>>2) > keys.length) {
+      rehash();
+    }
+
+    return null;
+  }
+
+  @SuppressWarnings("unchecked")
+  private void rehash() {
+    assert keys.length == values.length;
+    final int newSize = 2*keys.length;
+    final char[][] oldkeys = keys;
+    final V[] oldvalues = values;
+    keys = new char[newSize][];
+    values = (V[]) new Object[newSize];
+
+    for(int i=0; i<oldkeys.length; i++) {
+      char[] text = oldkeys[i];
+      if (text != null) {
+        // todo: could be faster... no need to compare strings on collision
+        final int slot = getSlot(text,0,text.length);
+        keys[slot] = text;
+        values[slot] = oldvalues[i];
+      }
+    }
+  }
+  
+  private boolean equals(char[] text1, int off, int len, char[] text2) {
+    if (len != text2.length)
+      return false;
+    final int limit = off+len;
+    if (ignoreCase) {
+      for(int i=0;i<len;) {
+        final int codePointAt = Character.codePointAt(text1, off+i, limit);
+        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
+          return false;
+        i += Character.charCount(codePointAt); 
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1[off+i] != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+
+  private boolean equals(CharSequence text1, char[] text2) {
+    int len = text1.length();
+    if (len != text2.length)
+      return false;
+    if (ignoreCase) {
+      for(int i=0;i<len;) {
+        final int codePointAt = Character.codePointAt(text1, i);
+        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
+          return false;
+        i += Character.charCount(codePointAt);
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1.charAt(i) != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+  
+  private int getHashCode(char[] text, int offset, int len) {
+    if (text == null)
+      throw new NullPointerException();
+    int code = 0;
+    final int stop = offset + len;
+    if (ignoreCase) {
+      for (int i=offset; i<stop;) {
+        final int codePointAt = Character.codePointAt(text, i, stop);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
+      }
+    } else {
+      for (int i=offset; i<stop; i++) {
+        code = code*31 + text[i];
+      }
+    }
+    return code;
+  }
+
+  private int getHashCode(CharSequence text) {
+    if (text == null)
+      throw new NullPointerException();
+    int code = 0;
+    int len = text.length();
+    if (ignoreCase) {
+      for (int i=0; i<len;) {
+        int codePointAt = Character.codePointAt(text, i);
+        code = code*31 + Character.toLowerCase(codePointAt);
+        i += Character.charCount(codePointAt);
+      }
+    } else {
+      for (int i=0; i<len; i++) {
+        code = code*31 + text.charAt(i);
+      }
+    }
+    return code;
+  }
+
+  @Override
+  public V remove(Object key) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public int size() {
+    return count;
+  }
+
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("{");
+    for (Map.Entry<Object,V> entry : entrySet()) {
+      if (sb.length()>1) sb.append(", ");
+      sb.append(entry);
+    }
+    return sb.append('}').toString();
+  }
+
+  private EntrySet entrySet = null;
+  private CharArraySet keySet = null;
+  
+  EntrySet createEntrySet() {
+    return new EntrySet(true);
+  }
+  
+  @Override
+  public final EntrySet entrySet() {
+    if (entrySet == null) {
+      entrySet = createEntrySet();
+    }
+    return entrySet;
+  }
+  
+  // helper for CharArraySet to not produce endless recursion
+  final Set<Object> originalKeySet() {
+    return super.keySet();
+  }
+
+  /** Returns an {@link CharArraySet} view on the map's keys.
+   * The set will use the same {@code matchVersion} as this map. */
+  @Override @SuppressWarnings({"unchecked","rawtypes"})
+  public final CharArraySet keySet() {
+    if (keySet == null) {
+      // prevent adding of entries
+      keySet = new CharArraySet((CharArrayMap) this) {
+        @Override
+        public boolean add(Object o) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(CharSequence text) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(String text) {
+          throw new UnsupportedOperationException();
+        }
+        @Override
+        public boolean add(char[] text) {
+          throw new UnsupportedOperationException();
+        }
+      };
+    }
+    return keySet;
+  }
+
+  /** public iterator class so efficient methods are exposed to users */
+  public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
+    private int pos=-1;
+    private int lastPos;
+    private final boolean allowModify;
+    
+    private EntryIterator(boolean allowModify) {
+      this.allowModify = allowModify;
+      goNext();
+    }
+
+    private void goNext() {
+      lastPos = pos;
+      pos++;
+      while (pos < keys.length && keys[pos] == null) pos++;
+    }
+
+    @Override
+    public boolean hasNext() {
+      return pos < keys.length;
+    }
+
+    /** gets the next key... do not modify the returned char[] */
+    public char[] nextKey() {
+      goNext();
+      return keys[lastPos];
+    }
+
+    /** gets the next key as a newly created String object */
+    public String nextKeyString() {
+      return new String(nextKey());
+    }
+
+    /** returns the value associated with the last key returned */
+    public V currentValue() {
+      return values[lastPos];
+    }
+
+    /** sets the value associated with the last key returned */    
+    public V setValue(V value) {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      V old = values[lastPos];
+      values[lastPos] = value;
+      return old;      
+    }
+
+    /** use nextCharArray() + currentValue() for better efficiency. */
+    @Override
+    public Map.Entry<Object,V> next() {
+      goNext();
+      return new MapEntry(lastPos, allowModify);
+    }
+
+    @Override
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  private final class MapEntry implements Map.Entry<Object,V> {
+    private final int pos;
+    private final boolean allowModify;
+
+    private MapEntry(int pos, boolean allowModify) {
+      this.pos = pos;
+      this.allowModify = allowModify;
+    }
+
+    @Override
+    public Object getKey() {
+      // we must clone here, as putAll to another CharArrayMap
+      // with other case sensitivity flag would corrupt the keys
+      return keys[pos].clone();
+    }
+
+    @Override
+    public V getValue() {
+      return values[pos];
+    }
+
+    @Override
+    public V setValue(V value) {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      final V old = values[pos];
+      values[pos] = value;
+      return old;
+    }
+
+    @Override
+    public String toString() {
+      return new StringBuilder().append(keys[pos]).append('=')
+        .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
+        .toString();
+    }
+  }
+
+  /** public EntrySet class so efficient methods are exposed to users */
+  public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
+    private final boolean allowModify;
+    
+    private EntrySet(boolean allowModify) {
+      this.allowModify = allowModify;
+    }
+  
+    @Override
+    public EntryIterator iterator() {
+      return new EntryIterator(allowModify);
+    }
+    
+    @Override
+    @SuppressWarnings("unchecked")
+    public boolean contains(Object o) {
+      if (!(o instanceof Map.Entry))
+        return false;
+      final Map.Entry<Object,V> e = (Map.Entry<Object,V>)o;
+      final Object key = e.getKey();
+      final Object val = e.getValue();
+      final Object v = get(key);
+      return v == null ? val == null : v.equals(val);
+    }
+    
+    @Override
+    public boolean remove(Object o) {
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public int size() {
+      return count;
+    }
+    
+    @Override
+    public void clear() {
+      if (!allowModify)
+        throw new UnsupportedOperationException();
+      CharArrayMap.this.clear();
+    }
+  }
+  
+  /**
+   * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
+   * unmodifiable views of internal map for "read-only" use.
+   * 
+   * @param map
+   *          a map for which the unmodifiable map is returned.
+   * @return an new unmodifiable {@link CharArrayMap}.
+   * @throws NullPointerException
+   *           if the given map is <code>null</code>.
+   */
+  public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
+    if (map == null)
+      throw new NullPointerException("Given map is null");
+    if (map == emptyMap() || map.isEmpty())
+      return emptyMap();
+    if (map instanceof UnmodifiableCharArrayMap)
+      return map;
+    return new UnmodifiableCharArrayMap<>(map);
+  }
+
+  /**
+   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
+   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
+   * 
+   * @param map
+   *          a map to copy
+   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
+   *         is a {@link CharArrayMap} the ignoreCase property as well as the
+   *         matchVersion will be of the given map will be preserved.
+   */
+  @SuppressWarnings("unchecked")
+  public static <V> CharArrayMap<V> copy(final Map<?,? extends V> map) {
+    if(map == EMPTY_MAP)
+      return emptyMap();
+    if(map instanceof CharArrayMap) {
+      CharArrayMap<V> m = (CharArrayMap<V>) map;
+      // use fast path instead of iterating all values
+      // this is even on very small sets ~10 times faster than iterating
+      final char[][] keys = new char[m.keys.length][];
+      System.arraycopy(m.keys, 0, keys, 0, keys.length);
+      final V[] values = (V[]) new Object[m.values.length];
+      System.arraycopy(m.values, 0, values, 0, values.length);
+      m = new CharArrayMap<>(m);
+      m.keys = keys;
+      m.values = values;
+      return m;
+    }
+    return new CharArrayMap<>(map, false);
+  }
+  
+  /** Returns an empty, unmodifiable map. */
+  @SuppressWarnings("unchecked")
+  public static <V> CharArrayMap<V> emptyMap() {
+    return (CharArrayMap<V>) EMPTY_MAP;
+  }
+  
+  // package private CharArraySet instanceof check in CharArraySet
+  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
+
+    UnmodifiableCharArrayMap(CharArrayMap<V> map) {
+      super(map);
+    }
+
+    @Override
+    public void clear() {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public V put(Object o, V val){
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public V put(char[] text, V val) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public V put(CharSequence text, V val) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public V put(String text, V val) {
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public V remove(Object key) {
+      throw new UnsupportedOperationException();
+    }
+  
+    @Override
+    EntrySet createEntrySet() {
+      return new EntrySet(false);
+    }
+  }
+  
+  /**
+   * Empty {@link org.apache.lucene.analysis.CharArrayMap.UnmodifiableCharArrayMap} optimized for speed.
+   * Contains checks will always return <code>false</code> or throw
+   * NPE if necessary.
+   */
+  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
+    EmptyCharArrayMap() {
+      super(new CharArrayMap<V>(0, false));
+    }
+    
+    @Override
+    public boolean containsKey(char[] text, int off, int len) {
+      if(text == null)
+        throw new NullPointerException();
+      return false;
+    }
+
+    @Override
+    public boolean containsKey(CharSequence cs) {
+      if(cs == null)
+        throw new NullPointerException();
+      return false;
+    }
+
+    @Override
+    public boolean containsKey(Object o) {
+      if(o == null)
+        throw new NullPointerException();
+      return false;
+    }
+    
+    @Override
+    public V get(char[] text, int off, int len) {
+      if(text == null)
+        throw new NullPointerException();
+      return null;
+    }
+
+    @Override
+    public V get(CharSequence cs) {
+      if(cs == null)
+        throw new NullPointerException();
+      return null;
+    }
+
+    @Override
+    public V get(Object o) {
+      if(o == null)
+        throw new NullPointerException();
+      return null;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/CharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/CharArraySet.java b/lucene/core/src/java/org/apache/lucene/analysis/CharArraySet.java
new file mode 100644
index 0000000..4c8066a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/CharArraySet.java
@@ -0,0 +1,196 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * A simple class that stores Strings as char[]'s in a
+ * hash table.  Note that this is not a general purpose
+ * class.  For example, it cannot remove items from the
+ * set, nor does it resize its hash table to be smaller,
+ * etc.  It is designed to be quick to test if a char[]
+ * is in the set without the necessity of converting it
+ * to a String first.
+ *
+ * <P>
+ * <em>Please note:</em> This class implements {@link java.util.Set Set} but
+ * does not behave like it should in all cases. The generic type is
+ * {@code Set<Object>}, because you can add any object to it,
+ * that has a string representation. The add methods will use
+ * {@link Object#toString} and store the result using a {@code char[]}
+ * buffer. The same behavior have the {@code contains()} methods.
+ * The {@link #iterator()} returns an {@code Iterator<char[]>}.
+ */
+public class CharArraySet extends AbstractSet<Object> {
+
+  /** An empty {@code CharArraySet}. */
+  public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
+  
+  private static final Object PLACEHOLDER = new Object();
+  
+  private final CharArrayMap<Object> map;
+  
+  /**
+   * Create set with enough capacity to hold startSize terms
+   * 
+   * @param startSize
+   *          the initial capacity
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(int startSize, boolean ignoreCase) {
+    this(new CharArrayMap<>(startSize, ignoreCase));
+  }
+
+  /**
+   * Creates a set from a Collection of objects. 
+   * 
+   * @param c
+   *          a collection whose elements to be placed into the set
+   * @param ignoreCase
+   *          <code>false</code> if and only if the set should be case sensitive
+   *          otherwise <code>true</code>.
+   */
+  public CharArraySet(Collection<?> c, boolean ignoreCase) {
+    this(c.size(), ignoreCase);
+    addAll(c);
+  }
+
+  /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
+  CharArraySet(final CharArrayMap<Object> map){
+    this.map = map;
+  }
+  
+  /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
+  @Override
+  public void clear() {
+    map.clear();
+  }
+
+  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+   * are in the set */
+  public boolean contains(char[] text, int off, int len) {
+    return map.containsKey(text, off, len);
+  }
+
+  /** true if the <code>CharSequence</code> is in the set */
+  public boolean contains(CharSequence cs) {
+    return map.containsKey(cs);
+  }
+
+  @Override
+  public boolean contains(Object o) {
+    return map.containsKey(o);
+  }
+
+  @Override
+  public boolean add(Object o) {
+    return map.put(o, PLACEHOLDER) == null;
+  }
+
+  /** Add this CharSequence into the set */
+  public boolean add(CharSequence text) {
+    return map.put(text, PLACEHOLDER) == null;
+  }
+  
+  /** Add this String into the set */
+  public boolean add(String text) {
+    return map.put(text, PLACEHOLDER) == null;
+  }
+
+  /** Add this char[] directly to the set.
+   * If ignoreCase is true for this Set, the text array will be directly modified.
+   * The user should never modify this text array after calling this method.
+   */
+  public boolean add(char[] text) {
+    return map.put(text, PLACEHOLDER) == null;
+  }
+
+  @Override
+  public int size() {
+    return map.size();
+  }
+  
+  /**
+   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
+   * unmodifiable views of internal sets for "read-only" use.
+   * 
+   * @param set
+   *          a set for which the unmodifiable set is returned.
+   * @return an new unmodifiable {@link CharArraySet}.
+   * @throws NullPointerException
+   *           if the given set is <code>null</code>.
+   */
+  public static CharArraySet unmodifiableSet(CharArraySet set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    if (set == EMPTY_SET)
+      return EMPTY_SET;
+    if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
+      return set;
+    return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
+  }
+
+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property as well as the
+   *         matchVersion will be of the given set will be preserved.
+   */
+  public static CharArraySet copy(final Set<?> set) {
+    if(set == EMPTY_SET)
+      return EMPTY_SET;
+    if(set instanceof CharArraySet) {
+      final CharArraySet source = (CharArraySet) set;
+      return new CharArraySet(CharArrayMap.copy(source.map));
+    }
+    return new CharArraySet(set, false);
+  }
+  
+  /**
+   * Returns an {@link Iterator} for {@code char[]} instances in this set.
+   */
+  @Override @SuppressWarnings("unchecked")
+  public Iterator<Object> iterator() {
+    // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
+    return map.originalKeySet().iterator();
+  }
+  
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("[");
+    for (Object item : this) {
+      if (sb.length()>1) sb.append(", ");
+      if (item instanceof char[]) {
+        sb.append((char[]) item);
+      } else {
+        sb.append(item);
+      }
+    }
+    return sb.append(']').toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/CharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/CharacterUtils.java b/lucene/core/src/java/org/apache/lucene/analysis/CharacterUtils.java
new file mode 100644
index 0000000..e2cc47f
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/CharacterUtils.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Utility class to write tokenizers or token filters.
+ * @lucene.internal
+ */
+public final class CharacterUtils {
+
+  private CharacterUtils() {} // no instantiation
+
+  /**
+   * Creates a new {@link CharacterBuffer} and allocates a <code>char[]</code>
+   * of the given bufferSize.
+   * 
+   * @param bufferSize
+   *          the internal char buffer size, must be <code>&gt;= 2</code>
+   * @return a new {@link CharacterBuffer} instance.
+   */
+  public static CharacterBuffer newCharacterBuffer(final int bufferSize) {
+    if (bufferSize < 2) {
+      throw new IllegalArgumentException("buffersize must be >= 2");
+    }
+    return new CharacterBuffer(new char[bufferSize], 0, 0);
+  }
+  
+  
+  /**
+   * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting 
+   * at the given offset.
+   * @param buffer the char buffer to lowercase
+   * @param offset the offset to start at
+   * @param limit the max char in the buffer to lower case
+   */
+  public static void toLowerCase(final char[] buffer, final int offset, final int limit) {
+    assert buffer.length >= limit;
+    assert offset <=0 && offset <= buffer.length;
+    for (int i = offset; i < limit;) {
+      i += Character.toChars(
+              Character.toLowerCase(
+                  Character.codePointAt(buffer, i, limit)), buffer, i);
+     }
+  }
+
+  /**
+   * Converts each unicode codepoint to UpperCase via {@link Character#toUpperCase(int)} starting 
+   * at the given offset.
+   * @param buffer the char buffer to UPPERCASE
+   * @param offset the offset to start at
+   * @param limit the max char in the buffer to lower case
+   */
+  public static void toUpperCase(final char[] buffer, final int offset, final int limit) {
+    assert buffer.length >= limit;
+    assert offset <=0 && offset <= buffer.length;
+    for (int i = offset; i < limit;) {
+      i += Character.toChars(
+              Character.toUpperCase(
+                  Character.codePointAt(buffer, i, limit)), buffer, i);
+     }
+  }
+
+  /** Converts a sequence of Java characters to a sequence of unicode code points.
+   *  @return the number of code points written to the destination buffer */
+  public static int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff) {
+    if (srcLen < 0) {
+      throw new IllegalArgumentException("srcLen must be >= 0");
+    }
+    int codePointCount = 0;
+    for (int i = 0; i < srcLen; ) {
+      final int cp = Character.codePointAt(src, srcOff + i, srcOff + srcLen);
+      final int charCount = Character.charCount(cp);
+      dest[destOff + codePointCount++] = cp;
+      i += charCount;
+    }
+    return codePointCount;
+  }
+
+  /** Converts a sequence of unicode code points to a sequence of Java characters.
+   *  @return the number of chars written to the destination buffer */
+  public static int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff) {
+    if (srcLen < 0) {
+      throw new IllegalArgumentException("srcLen must be >= 0");
+    }
+    int written = 0;
+    for (int i = 0; i < srcLen; ++i) {
+      written += Character.toChars(src[srcOff + i], dest, destOff + written);
+    }
+    return written;
+  }
+
+  /**
+   * Fills the {@link CharacterBuffer} with characters read from the given
+   * reader {@link Reader}. This method tries to read <code>numChars</code>
+   * characters into the {@link CharacterBuffer}, each call to fill will start
+   * filling the buffer from offset <code>0</code> up to <code>numChars</code>.
+   * In case code points can span across 2 java characters, this method may
+   * only fill <code>numChars - 1</code> characters in order not to split in
+   * the middle of a surrogate pair, even if there are remaining characters in
+   * the {@link Reader}.
+   * <p>
+   * This method guarantees
+   * that the given {@link CharacterBuffer} will never contain a high surrogate
+   * character as the last element in the buffer unless it is the last available
+   * character in the reader. In other words, high and low surrogate pairs will
+   * always be preserved across buffer boarders.
+   * </p>
+   * <p>
+   * A return value of <code>false</code> means that this method call exhausted
+   * the reader, but there may be some bytes which have been read, which can be
+   * verified by checking whether <code>buffer.getLength() &gt; 0</code>.
+   * </p>
+   * 
+   * @param buffer
+   *          the buffer to fill.
+   * @param reader
+   *          the reader to read characters from.
+   * @param numChars
+   *          the number of chars to read
+   * @return <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer
+   * @throws IOException
+   *           if the reader throws an {@link IOException}.
+   */
+  public static boolean fill(CharacterBuffer buffer, Reader reader, int numChars) throws IOException {
+    assert buffer.buffer.length >= 2;
+    if (numChars < 2 || numChars > buffer.buffer.length) {
+      throw new IllegalArgumentException("numChars must be >= 2 and <= the buffer size");
+    }
+    final char[] charBuffer = buffer.buffer;
+    buffer.offset = 0;
+    final int offset;
+
+    // Install the previously saved ending high surrogate:
+    if (buffer.lastTrailingHighSurrogate != 0) {
+      charBuffer[0] = buffer.lastTrailingHighSurrogate;
+      buffer.lastTrailingHighSurrogate = 0;
+      offset = 1;
+    } else {
+      offset = 0;
+    }
+
+    final int read = readFully(reader, charBuffer, offset, numChars - offset);
+
+    buffer.length = offset + read;
+    final boolean result = buffer.length == numChars;
+    if (buffer.length < numChars) {
+      // We failed to fill the buffer. Even if the last char is a high
+      // surrogate, there is nothing we can do
+      return result;
+    }
+
+    if (Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
+      buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
+    }
+    return result;
+  }
+
+  /** Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. */
+  public static boolean fill(CharacterBuffer buffer, Reader reader) throws IOException {
+    return fill(buffer, reader, buffer.buffer.length);
+  }
+
+  static int readFully(Reader reader, char[] dest, int offset, int len) throws IOException {
+    int read = 0;
+    while (read < len) {
+      final int r = reader.read(dest, offset + read, len - read);
+      if (r == -1) {
+        break;
+      }
+      read += r;
+    }
+    return read;
+  }
+
+  /**
+   * A simple IO buffer to use with
+   * {@link CharacterUtils#fill(CharacterBuffer, Reader)}.
+   */
+  public static final class CharacterBuffer {
+    
+    private final char[] buffer;
+    private int offset;
+    private int length;
+    // NOTE: not private so outer class can access without
+    // $access methods:
+    char lastTrailingHighSurrogate;
+    
+    CharacterBuffer(char[] buffer, int offset, int length) {
+      this.buffer = buffer;
+      this.offset = offset;
+      this.length = length;
+    }
+    
+    /**
+     * Returns the internal buffer
+     * 
+     * @return the buffer
+     */
+    public char[] getBuffer() {
+      return buffer;
+    }
+    
+    /**
+     * Returns the data offset in the internal buffer.
+     * 
+     * @return the offset
+     */
+    public int getOffset() {
+      return offset;
+    }
+    
+    /**
+     * Return the length of the data in the internal buffer starting at
+     * {@link #getOffset()}
+     * 
+     * @return the length
+     */
+    public int getLength() {
+      return length;
+    }
+    
+    /**
+     * Resets the CharacterBuffer. All internals are reset to its default
+     * values.
+     */
+    public void reset() {
+      offset = 0;
+      length = 0;
+      lastTrailingHighSurrogate = 0;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/FilteringTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/FilteringTokenFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/FilteringTokenFilter.java
new file mode 100644
index 0000000..cecad10
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/FilteringTokenFilter.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * Abstract base class for TokenFilters that may remove tokens.
+ * You have to implement {@link #accept} and return a boolean if the current
+ * token should be preserved. {@link #incrementToken} uses this method
+ * to decide if a token should be passed to the caller.
+ */
+public abstract class FilteringTokenFilter extends TokenFilter {
+
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private int skippedPositions;
+
+  /**
+   * Create a new {@link FilteringTokenFilter}.
+   * @param in      the {@link TokenStream} to consume
+   */
+  public FilteringTokenFilter(TokenStream in) {
+    super(in);
+  }
+
+  /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
+  protected abstract boolean accept() throws IOException;
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    skippedPositions = 0;
+    while (input.incrementToken()) {
+      if (accept()) {
+        if (skippedPositions != 0) {
+          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+        }
+        return true;
+      }
+      skippedPositions += posIncrAtt.getPositionIncrement();
+    }
+
+    // reached EOS -- return false
+    return false;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    skippedPositions = 0;
+  }
+
+  @Override
+  public void end() throws IOException {
+    super.end();
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
new file mode 100644
index 0000000..b86684d
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharacterUtils;
+
+/**
+ * Normalizes token text to lower case.
+ */
+public final class LowerCaseFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  
+  /**
+   * Create a new LowerCaseFilter, that normalizes token text to lower case.
+   * 
+   * @param in TokenStream to filter
+   */
+  public LowerCaseFilter(TokenStream in) {
+    super(in);
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
+      return true;
+    } else
+      return false;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
new file mode 100644
index 0000000..79707bc
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/StopFilter.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.analysis.FilteringTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.CharArraySet;
+
+/**
+ * Removes stop words from a token stream.
+ */
+public final class StopFilter extends FilteringTokenFilter {
+
+  private final CharArraySet stopWords;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  
+  /**
+   * Constructs a filter which removes words from the input TokenStream that are
+   * named in the Set.
+   * 
+   * @param in
+   *          Input stream
+   * @param stopWords
+   *          A {@link CharArraySet} representing the stopwords.
+   * @see #makeStopSet(java.lang.String...)
+   */
+  public StopFilter(TokenStream in, CharArraySet stopWords) {
+    super(in);
+    this.stopWords = stopWords;
+  }
+
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * 
+   * @param stopWords An array of stopwords
+   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static CharArraySet makeStopSet(String... stopWords) {
+    return makeStopSet(stopWords, false);
+  }
+  
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * 
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @return A Set ({@link CharArraySet}) containing the words
+   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static CharArraySet makeStopSet(List<?> stopWords) {
+    return makeStopSet(stopWords, false);
+  }
+    
+  /**
+   * Creates a stopword set from the given stopword array.
+   * 
+   * @param stopWords An array of stopwords
+   * @param ignoreCase If true, all words are lower cased first.  
+   * @return a Set containing the words
+   */    
+  public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
+    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+    stopSet.addAll(Arrays.asList(stopWords));
+    return stopSet;
+  }
+  
+  /**
+   * Creates a stopword set from the given stopword list.
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   * @return A Set ({@link CharArraySet}) containing the words
+   */
+  public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+    stopSet.addAll(stopWords);
+    return stopSet;
+  }
+  
+  /**
+   * Returns the next input Token whose term() is not a stop word.
+   */
+  @Override
+  protected boolean accept() {
+    return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java b/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
new file mode 100644
index 0000000..c35e715
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/StopwordAnalyzerBase.java
@@ -0,0 +1,138 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Base class for Analyzers that need to make use of stopword sets. 
+ * 
+ */
+public abstract class StopwordAnalyzerBase extends Analyzer {
+
+  /**
+   * An immutable stopword set
+   */
+  protected final CharArraySet stopwords;
+
+  /**
+   * Returns the analyzer's stopword set or an empty set if the analyzer has no
+   * stopwords
+   * 
+   * @return the analyzer's stopword set or an empty set if the analyzer has no
+   *         stopwords
+   */
+  public CharArraySet getStopwordSet() {
+    return stopwords;
+  }
+
+  /**
+   * Creates a new instance initialized with the given stopword set
+   * 
+   * @param stopwords
+   *          the analyzer's stopword set
+   */
+  protected StopwordAnalyzerBase(final CharArraySet stopwords) {
+    // analyzers should use char array set for stopwords!
+    this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
+        .unmodifiableSet(CharArraySet.copy(stopwords));
+  }
+
+  /**
+   * Creates a new Analyzer with an empty stopword set
+   */
+  protected StopwordAnalyzerBase() {
+    this(null);
+  }
+
+  /**
+   * Creates a CharArraySet from a file resource associated with a class. (See
+   * {@link Class#getResourceAsStream(String)}).
+   * 
+   * @param ignoreCase
+   *          <code>true</code> if the set should ignore the case of the
+   *          stopwords, otherwise <code>false</code>
+   * @param aClass
+   *          a class that is associated with the given stopwordResource
+   * @param resource
+   *          name of the resource file associated with the given class
+   * @param comment
+   *          comment string to ignore in the stopword file
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         file
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
+      final Class<? extends Analyzer> aClass, final String resource,
+      final String comment) throws IOException {
+    Reader reader = null;
+    try {
+      reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
+      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
+    } finally {
+      IOUtils.close(reader);
+    }
+    
+  }
+  
+  /**
+   * Creates a CharArraySet from a path.
+   * 
+   * @param stopwords
+   *          the stopwords file to load
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         file
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(Path stopwords) throws IOException {
+    Reader reader = null;
+    try {
+      reader = Files.newBufferedReader(stopwords, StandardCharsets.UTF_8);
+      return WordlistLoader.getWordSet(reader);
+    } finally {
+      IOUtils.close(reader);
+    }
+  }
+  
+  /**
+   * Creates a CharArraySet from a file.
+   * 
+   * @param stopwords
+   *          the stopwords reader to load
+   * 
+   * @return a CharArraySet containing the distinct stopwords from the given
+   *         reader
+   * @throws IOException
+   *           if loading the stopwords throws an {@link IOException}
+   */
+  protected static CharArraySet loadStopwordSet(Reader stopwords) throws IOException {
+    try {
+      return WordlistLoader.getWordSet(stopwords);
+    } finally {
+      IOUtils.close(stopwords);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java b/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
new file mode 100644
index 0000000..2397e66
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/WordlistLoader.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Loader for text files that represent a list of stopwords.
+ * 
+ * @see IOUtils to obtain {@link Reader} instances
+ * @lucene.internal
+ */
+public class WordlistLoader {
+  
+  private static final int INITIAL_CAPACITY = 16;
+  
+  /** no instance */
+  private WordlistLoader() {}
+  
+  /**
+   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getWordSet(Reader reader, CharArraySet result) throws IOException {
+    BufferedReader br = null;
+    try {
+      br = getBufferedReader(reader);
+      String word = null;
+      while ((word = br.readLine()) != null) {
+        result.add(word.trim());
+      }
+    }
+    finally {
+      IOUtils.close(br);
+    }
+    return result;
+  }
+  
+  /**
+   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @return A {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getWordSet(Reader reader) throws IOException {
+    return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
+  }
+
+  /**
+   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @param comment The string representing a comment.
+   * @return A CharArraySet with the reader's words
+   */
+  public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
+    return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
+  }
+
+  /**
+   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @param comment The string representing a comment.
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getWordSet(Reader reader, String comment, CharArraySet result) throws IOException {
+    BufferedReader br = null;
+    try {
+      br = getBufferedReader(reader);
+      String word = null;
+      while ((word = br.readLine()) != null) {
+        if (word.startsWith(comment) == false){
+          result.add(word.trim());
+        }
+      }
+    }
+    finally {
+      IOUtils.close(br);
+    }
+    return result;
+  }
+
+  
+  /**
+   * Reads stopwords from a stopword list in Snowball format.
+   * <p>
+   * The snowball format is the following:
+   * <ul>
+   * <li>Lines may contain multiple words separated by whitespace.
+   * <li>The comment character is the vertical line (&#124;).
+   * <li>Lines may contain trailing comments.
+   * </ul>
+   * 
+   * @param reader Reader containing a Snowball stopword list
+   * @param result the {@link CharArraySet} to fill with the readers words
+   * @return the given {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
+      throws IOException {
+    BufferedReader br = null;
+    try {
+      br = getBufferedReader(reader);
+      String line = null;
+      while ((line = br.readLine()) != null) {
+        int comment = line.indexOf('|');
+        if (comment >= 0) line = line.substring(0, comment);
+        String words[] = line.split("\\s+");
+        for (int i = 0; i < words.length; i++)
+          if (words[i].length() > 0) result.add(words[i]);
+      }
+    } finally {
+      IOUtils.close(br);
+    }
+    return result;
+  }
+  
+  /**
+   * Reads stopwords from a stopword list in Snowball format.
+   * <p>
+   * The snowball format is the following:
+   * <ul>
+   * <li>Lines may contain multiple words separated by whitespace.
+   * <li>The comment character is the vertical line (&#124;).
+   * <li>Lines may contain trailing comments.
+   * </ul>
+   * 
+   * @param reader Reader containing a Snowball stopword list
+   * @return A {@link CharArraySet} with the reader's words
+   */
+  public static CharArraySet getSnowballWordSet(Reader reader) throws IOException {
+    return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
+  }
+
+
+  /**
+   * Reads a stem dictionary. Each line contains:
+   * <pre>word<b>\t</b>stem</pre>
+   * (i.e. two tab separated words)
+   *
+   * @return stem dictionary that overrules the stemming algorithm
+   * @throws IOException If there is a low-level I/O error.
+   */
+  public static CharArrayMap<String> getStemDict(Reader reader, CharArrayMap<String> result) throws IOException {
+    BufferedReader br = null;
+    try {
+      br = getBufferedReader(reader);
+      String line;
+      while ((line = br.readLine()) != null) {
+        String[] wordstem = line.split("\t", 2);
+        result.put(wordstem[0], wordstem[1]);
+      }
+    } finally {
+      IOUtils.close(br);
+    }
+    return result;
+  }
+  
+  /**
+   * Accesses a resource by name and returns the (non comment) lines containing
+   * data using the given character encoding.
+   *
+   * <p>
+   * A comment line is any line that starts with the character "#"
+   * </p>
+   *
+   * @return a list of non-blank non-comment lines with whitespace trimmed
+   * @throws IOException If there is a low-level I/O error.
+   */
+  public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
+    BufferedReader input = null;
+    ArrayList<String> lines;
+    boolean success = false;
+    try {
+      input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
+
+      lines = new ArrayList<>();
+      for (String word=null; (word=input.readLine())!=null;) {
+        // skip initial bom marker
+        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
+          word = word.substring(1);
+        // skip comments
+        if (word.startsWith("#")) continue;
+        word=word.trim();
+        // skip blank lines
+        if (word.length()==0) continue;
+        lines.add(word);
+      }
+      success = true;
+      return lines;
+    } finally {
+      if (success) {
+        IOUtils.close(input);
+      } else {
+        IOUtils.closeWhileHandlingException(input);
+      }
+    }
+  }
+  
+  private static BufferedReader getBufferedReader(Reader reader) {
+    return (reader instanceof BufferedReader) ? (BufferedReader) reader
+        : new BufferedReader(reader);
+  }
+  
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
index 511f268..81858df 100644
--- a/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
+++ b/lucene/core/src/java/org/apache/lucene/analysis/package-info.java
@@ -156,7 +156,7 @@
  *   over and over in many places, you can make a subclass of
  *   {@link org.apache.lucene.analysis.Analyzer}. In fact, Apache Lucene
  *   supplies a large family of <code>Analyzer</code> classes that deliver useful
- *   analysis chains. The most common of these is the <a href="{@docRoot}/../analyzers-common/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.
+ *   analysis chains. The most common of these is the <a href="{@docRoot}/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.
  *   Many applications will have a long and industrious life with nothing more
  *   than the <code>StandardAnalyzer</code>. The <a href="{@docRoot}/../analyzers-common/overview-summary.html">analyzers-common</a>
  *   library provides many pre-existing analyzers for various languages.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
new file mode 100644
index 0000000..251017d
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WordlistLoader;
+
+/**
+ * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}, using a list of
+ * English stop words.
+ */
+public final class StandardAnalyzer extends StopwordAnalyzerBase {
+
+  /** An unmodifiable set containing some common English words that are not usually useful
+  for searching.*/
+  public static final CharArraySet ENGLISH_STOP_WORDS_SET;
+  
+  static {
+    final List<String> stopWords = Arrays.asList(
+      "a", "an", "and", "are", "as", "at", "be", "but", "by",
+      "for", "if", "in", "into", "is", "it",
+      "no", "not", "of", "on", "or", "such",
+      "that", "the", "their", "then", "there", "these",
+      "they", "this", "to", "was", "will", "with"
+    );
+    final CharArraySet stopSet = new CharArraySet(stopWords, false);
+    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
+  }
+  
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /** An unmodifiable set containing some common English words that are usually not
+  useful for searching. */
+  public static final CharArraySet STOP_WORDS_SET = ENGLISH_STOP_WORDS_SET;
+
+  /** Builds an analyzer with the given stop words.
+   * @param stopWords stop words */
+  public StandardAnalyzer(CharArraySet stopWords) {
+    super(stopWords);
+  }
+
+  /** Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}).
+   */
+  public StandardAnalyzer() {
+    this(STOP_WORDS_SET);
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   * @param stopwords Reader to read stop words from */
+  public StandardAnalyzer(Reader stopwords) throws IOException {
+    this(loadStopwordSet(stopwords));
+  }
+
+  /**
+   * Set maximum allowed token length.  If a token is seen
+   * that exceeds this length then it is discarded.  This
+   * setting only takes effect the next time tokenStream or
+   * tokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+    
+  /** Returns the current maximum token length
+   * 
+   *  @see #setMaxTokenLength */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  @Override
+  protected TokenStreamComponents createComponents(final String fieldName) {
+    final StandardTokenizer src = new StandardTokenizer();
+    src.setMaxTokenLength(maxTokenLength);
+    TokenStream tok = new StandardFilter(src);
+    tok = new LowerCaseFilter(tok);
+    tok = new StopFilter(tok, stopwords);
+    return new TokenStreamComponents(src, tok) {
+      @Override
+      protected void setReader(final Reader reader) {
+        src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
+        super.setReader(reader);
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardFilter.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
new file mode 100644
index 0000000..202db37
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Normalizes tokens extracted with {@link StandardTokenizer}.
+ */
+public class StandardFilter extends TokenFilter {
+
+  /** Sole constructor */
+  public StandardFilter(TokenStream in) {
+    super(in);
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    return input.incrementToken(); // TODO: add some niceties for the new grammar
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
new file mode 100644
index 0000000..5c5169a
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@@ -0,0 +1,213 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeFactory;
+
+/** A grammar-based tokenizer constructed with JFlex.
+ * <p>
+ * This class implements the Word Break rules from the
+ * Unicode Text Segmentation algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ */
+
+public final class StandardTokenizer extends Tokenizer {
+  /** A private instance of the JFlex-constructed scanner */
+  private StandardTokenizerImpl scanner;
+
+  // TODO: how can we remove these old types?!
+  /** Alpha/numeric token type */
+  public static final int ALPHANUM          = 0;
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int APOSTROPHE        = 1;
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int ACRONYM           = 2;
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int COMPANY           = 3;
+  /** Email token type */
+  public static final int EMAIL             = 4;
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int HOST              = 5;
+  /** Numeric token type */
+  public static final int NUM               = 6;
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int CJ                = 7;
+
+  /** @deprecated (3.1) */
+  @Deprecated
+  public static final int ACRONYM_DEP       = 8;
+
+  /** Southeast Asian token type */
+  public static final int SOUTHEAST_ASIAN = 9;
+  /** Idiographic token type */
+  public static final int IDEOGRAPHIC = 10;
+  /** Hiragana token type */
+  public static final int HIRAGANA = 11;
+  /** Katakana token type */
+  public static final int KATAKANA = 12;
+
+  /** Hangul token type */
+  public static final int HANGUL = 13;
+  
+  /** String token types that correspond to token type int constants */
+  public static final String [] TOKEN_TYPES = new String [] {
+    "<ALPHANUM>",
+    "<APOSTROPHE>",
+    "<ACRONYM>",
+    "<COMPANY>",
+    "<EMAIL>",
+    "<HOST>",
+    "<NUM>",
+    "<CJ>",
+    "<ACRONYM_DEP>",
+    "<SOUTHEAST_ASIAN>",
+    "<IDEOGRAPHIC>",
+    "<HIRAGANA>",
+    "<KATAKANA>",
+    "<HANGUL>"
+  };
+  
+  /** Absolute maximum sized token */
+  public static final int MAX_TOKEN_LENGTH_LIMIT = 1024 * 1024;
+  
+  private int skippedPositions;
+
+  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Set the max allowed token length.  No tokens longer than this are emitted.
+   * 
+   * @throws IllegalArgumentException if the given length is outside of the
+   *  range [1, {@value #MAX_TOKEN_LENGTH_LIMIT}].
+   */ 
+  public void setMaxTokenLength(int length) {
+    if (length < 1) {
+      throw new IllegalArgumentException("maxTokenLength must be greater than zero");
+    } else if (length > MAX_TOKEN_LENGTH_LIMIT) {
+      throw new IllegalArgumentException("maxTokenLength may not exceed " + MAX_TOKEN_LENGTH_LIMIT);
+    }
+    if (length != maxTokenLength) {
+      maxTokenLength = length;
+      scanner.setBufferSize(length);
+    }
+  }
+
+  /** Returns the current maximum token length
+   * 
+   *  @see #setMaxTokenLength */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  /**
+   * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}.  Attaches
+   * the <code>input</code> to the newly created JFlex scanner.
+
+   * See http://issues.apache.org/jira/browse/LUCENE-1068
+   */
+  public StandardTokenizer() {
+    init();
+  }
+
+  /**
+   * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeFactory} 
+   */
+  public StandardTokenizer(AttributeFactory factory) {
+    super(factory);
+    init();
+  }
+
+  private void init() {
+    this.scanner = new StandardTokenizerImpl(input);
+  }
+
+  // this tokenizer generates three attributes:
+  // term offset, positionIncrement and type
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.lucene.analysis.TokenStream#next()
+   */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    skippedPositions = 0;
+
+    while(true) {
+      int tokenType = scanner.getNextToken();
+
+      if (tokenType == StandardTokenizerImpl.YYEOF) {
+        return false;
+      }
+
+      if (scanner.yylength() <= maxTokenLength) {
+        posIncrAtt.setPositionIncrement(skippedPositions+1);
+        scanner.getText(termAtt);
+        final int start = scanner.yychar();
+        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.length()));
+        typeAtt.setType(StandardTokenizer.TOKEN_TYPES[tokenType]);
+        return true;
+      } else
+        // When we skip a too-long term, we still increment the
+        // position increment
+        skippedPositions++;
+    }
+  }
+  
+  @Override
+  public final void end() throws IOException {
+    super.end();
+    // set final offset
+    int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+    offsetAtt.setOffset(finalOffset, finalOffset);
+    // adjust any skipped tokens
+    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+skippedPositions);
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    scanner.yyreset(input);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    scanner.yyreset(input);
+    skippedPositions = 0;
+  }
+}

[04/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
new file mode 100644
index 0000000..5d7b240
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@@ -0,0 +1,823 @@
+/* The following code was generated by JFlex 1.6.0 */
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * This class implements Word Break rules from the Unicode Text Segmentation 
+ * algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
+ * <p>
+ * Tokens produced are of the following types:
+ * <ul>
+ *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+ *   <li>&lt;NUM&gt;: A number</li>
+ *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+ *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+ *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+ *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
+ *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ * </ul>
+ */
+@SuppressWarnings("fallthrough")
+
+public final class StandardTokenizerImpl {
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  private int ZZ_BUFFERSIZE = 255;
+
+  /** lexical states */
+  public static final int YYINITIAL = 0;
+
+  /**
+   * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
+   * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l
+   *                  at the beginning of a line
+   * l is of the form l = 2*k, k a non negative integer
+   */
+  private static final int ZZ_LEXSTATE[] = { 
+     0, 0
+  };
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final String ZZ_CMAP_PACKED = 
+    "\42\0\1\15\4\0\1\14\4\0\1\7\1\0\1\10\1\0\12\4"+
+    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\57\0\1\1"+
+    "\2\0\1\3\7\0\1\1\1\0\1\6\2\0\1\1\5\0\27\1"+
+    "\1\0\37\1\1\0\u01ca\1\4\0\14\1\5\0\1\6\10\0\5\1"+
+    "\7\0\1\1\1\0\1\1\21\0\160\3\5\1\1\0\2\1\2\0"+
+    "\4\1\1\7\7\0\1\1\1\6\3\1\1\0\1\1\1\0\24\1"+
+    "\1\0\123\1\1\0\213\1\1\0\7\3\236\1\11\0\46\1\2\0"+
+    "\1\1\7\0\47\1\1\0\1\7\7\0\55\3\1\0\1\3\1\0"+
+    "\2\3\1\0\2\3\1\0\1\3\10\0\33\16\5\0\3\16\1\1"+
+    "\1\6\13\0\5\3\7\0\2\7\2\0\13\3\1\0\1\3\3\0"+
+    "\53\1\25\3\12\4\1\0\1\4\1\7\1\0\2\1\1\3\143\1"+
+    "\1\0\1\1\10\3\1\0\6\3\2\1\2\3\1\0\4\3\2\1"+
+    "\12\4\3\1\2\0\1\1\17\0\1\3\1\1\1\3\36\1\33\3"+
+    "\2\0\131\1\13\3\1\1\16\0\12\4\41\1\11\3\2\1\2\0"+
+    "\1\7\1\0\1\1\5\0\26\1\4\3\1\1\11\3\1\1\3\3"+
+    "\1\1\5\3\22\0\31\1\3\3\104\0\1\1\1\0\13\1\67\0"+
+    "\33\3\1\0\4\3\66\1\3\3\1\1\22\3\1\1\7\3\12\1"+
+    "\2\3\2\0\12\4\1\0\7\1\1\0\7\1\1\0\3\3\1\0"+
+    "\10\1\2\0\2\1\2\0\26\1\1\0\7\1\1\0\1\1\3\0"+
+    "\4\1\2\0\1\3\1\1\7\3\2\0\2\3\2\0\3\3\1\1"+
+    "\10\0\1\3\4\0\2\1\1\0\3\1\2\3\2\0\12\4\2\1"+
+    "\17\0\3\3\1\0\6\1\4\0\2\1\2\0\26\1\1\0\7\1"+
+    "\1\0\2\1\1\0\2\1\1\0\2\1\2\0\1\3\1\0\5\3"+
+    "\4\0\2\3\2\0\3\3\3\0\1\3\7\0\4\1\1\0\1\1"+
+    "\7\0\12\4\2\3\3\1\1\3\13\0\3\3\1\0\11\1\1\0"+
+    "\3\1\1\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0"+
+    "\1\3\1\1\10\3\1\0\3\3\1\0\3\3\2\0\1\1\17\0"+
+    "\2\1\2\3\2\0\12\4\21\0\3\3\1\0\10\1\2\0\2\1"+
+    "\2\0\26\1\1\0\7\1\1\0\2\1\1\0\5\1\2\0\1\3"+
+    "\1\1\7\3\2\0\2\3\2\0\3\3\10\0\2\3\4\0\2\1"+
+    "\1\0\3\1\2\3\2\0\12\4\1\0\1\1\20\0\1\3\1\1"+
+    "\1\0\6\1\3\0\3\1\1\0\4\1\3\0\2\1\1\0\1\1"+
+    "\1\0\2\1\3\0\2\1\3\0\3\1\3\0\14\1\4\0\5\3"+
+    "\3\0\3\3\1\0\4\3\2\0\1\1\6\0\1\3\16\0\12\4"+
+    "\21\0\3\3\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1"+
+    "\1\0\5\1\3\0\1\1\7\3\1\0\3\3\1\0\4\3\7\0"+
+    "\2\3\1\0\2\1\6\0\2\1\2\3\2\0\12\4\22\0\2\3"+
+    "\1\0\10\1\1\0\3\1\1\0\27\1\1\0\12\1\1\0\5\1"+
+    "\2\0\1\3\1\1\7\3\1\0\3\3\1\0\4\3\7\0\2\3"+
+    "\7\0\1\1\1\0\2\1\2\3\2\0\12\4\1\0\2\1\17\0"+
+    "\2\3\1\0\10\1\1\0\3\1\1\0\51\1\2\0\1\1\7\3"+
+    "\1\0\3\3\1\0\4\3\1\1\10\0\1\3\10\0\2\1\2\3"+
+    "\2\0\12\4\12\0\6\1\2\0\2\3\1\0\22\1\3\0\30\1"+
+    "\1\0\11\1\1\0\1\1\2\0\7\1\3\0\1\3\4\0\6\3"+
+    "\1\0\1\3\1\0\10\3\22\0\2\3\15\0\60\20\1\21\2\20"+
+    "\7\21\5\0\7\20\10\21\1\0\12\4\47\0\2\20\1\0\1\20"+
+    "\2\0\2\20\1\0\1\20\2\0\1\20\6\0\4\20\1\0\7\20"+
+    "\1\0\3\20\1\0\1\20\1\0\1\20\2\0\2\20\1\0\4\20"+
+    "\1\21\2\20\6\21\1\0\2\21\1\20\2\0\5\20\1\0\1\20"+
+    "\1\0\6\21\2\0\12\4\2\0\4\20\40\0\1\1\27\0\2\3"+
+    "\6\0\12\4\13\0\1\3\1\0\1\3\1\0\1\3\4\0\2\3"+
+    "\10\1\1\0\44\1\4\0\24\3\1\0\2\3\5\1\13\3\1\0"+
+    "\44\3\11\0\1\3\71\0\53\20\24\21\1\20\12\4\6\0\6\20"+
+    "\4\21\4\20\3\21\1\20\3\21\2\20\7\21\3\20\4\21\15\20"+
+    "\14\21\1\20\1\21\12\4\4\21\2\20\46\1\1\0\1\1\5\0"+
+    "\1\1\2\0\53\1\1\0\4\1\u0100\2\111\1\1\0\4\1\2\0"+
+    "\7\1\1\0\1\1\1\0\4\1\2\0\51\1\1\0\4\1\2\0"+
+    "\41\1\1\0\4\1\2\0\7\1\1\0\1\1\1\0\4\1\2\0"+
+    "\17\1\1\0\71\1\1\0\4\1\2\0\103\1\2\0\3\3\40\0"+
+    "\20\1\20\0\125\1\14\0\u026c\1\2\0\21\1\1\0\32\1\5\0"+
+    "\113\1\3\0\3\1\17\0\15\1\1\0\4\1\3\3\13\0\22\1"+
+    "\3\3\13\0\22\1\2\3\14\0\15\1\1\0\3\1\1\0\2\3"+
+    "\14\0\64\20\40\21\3\0\1\20\4\0\1\20\1\21\2\0\12\4"+
+    "\41\0\4\3\1\0\12\4\6\0\130\1\10\0\51\1\1\3\1\1"+
+    "\5\0\106\1\12\0\35\1\3\0\14\3\4\0\14\3\12\0\12\4"+
+    "\36\20\2\0\5\20\13\0\54\20\4\0\21\21\7\20\2\21\6\0"+
+    "\12\4\1\20\3\0\2\20\40\0\27\1\5\3\4\0\65\20\12\21"+
+    "\1\0\35\21\2\0\1\3\12\4\6\0\12\4\6\0\16\20\122\0"+
+    "\5\3\57\1\21\3\7\1\4\0\12\4\21\0\11\3\14\0\3\3"+
+    "\36\1\15\3\2\1\12\4\54\1\16\3\14\0\44\1\24\3\10\0"+
+    "\12\4\3\0\3\1\12\4\44\1\122\0\3\3\1\0\25\3\4\1"+
+    "\1\3\4\1\3\3\2\1\11\0\300\1\47\3\25\0\4\3\u0116\1"+
+    "\2\0\6\1\2\0\46\1\2\0\6\1\2\0\10\1\1\0\1\1"+
+    "\1\0\1\1\1\0\1\1\1\0\37\1\2\0\65\1\1\0\7\1"+
+    "\1\0\1\1\3\0\3\1\1\0\7\1\3\0\4\1\2\0\6\1"+
+    "\4\0\15\1\5\0\3\1\1\0\7\1\17\0\4\3\10\0\2\10"+
+    "\12\0\1\10\2\0\1\6\2\0\5\3\20\0\2\11\3\0\1\7"+
+    "\17\0\1\11\13\0\5\3\1\0\12\3\1\0\1\1\15\0\1\1"+
+    "\20\0\15\1\63\0\41\3\21\0\1\1\4\0\1\1\2\0\12\1"+
+    "\1\0\1\1\3\0\5\1\6\0\1\1\1\0\1\1\1\0\1\1"+
+    "\1\0\4\1\1\0\13\1\2\0\4\1\5\0\5\1\4\0\1\1"+
+    "\21\0\51\1\u032d\0\64\1\u0716\0\57\1\1\0\57\1\1\0\205\1"+
+    "\6\0\4\1\3\3\2\1\14\0\46\1\1\0\1\1\5\0\1\1"+
+    "\2\0\70\1\7\0\1\1\17\0\1\3\27\1\11\0\7\1\1\0"+
+    "\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0\7\1\1\0"+
+    "\7\1\1\0\7\1\1\0\40\3\57\0\1\1\120\0\32\12\1\0"+
+    "\131\12\14\0\326\12\57\0\1\1\1\0\1\12\31\0\11\12\6\3"+
+    "\1\0\5\5\2\0\3\12\1\1\1\1\4\0\126\13\2\0\2\3"+
+    "\2\5\3\13\133\5\1\0\4\5\5\0\51\1\3\0\136\2\21\0"+
+    "\33\1\65\0\20\5\320\0\57\5\1\0\130\5\250\0\u19b6\12\112\0"+
+    "\u51cd\12\63\0\u048d\1\103\0\56\1\2\0\u010d\1\3\0\20\1\12\4"+
+    "\2\1\24\0\57\1\4\3\1\0\12\3\1\0\31\1\7\0\1\3"+
+    "\120\1\2\3\45\0\11\1\2\0\147\1\2\0\4\1\1\0\4\1"+
+    "\14\0\13\1\115\0\12\1\1\3\3\1\1\3\4\1\1\3\27\1"+
+    "\5\3\30\0\64\1\14\0\2\3\62\1\21\3\13\0\12\4\6\0"+
+    "\22\3\6\1\3\0\1\1\4\0\12\4\34\1\10\3\2\0\27\1"+
+    "\15\3\14\0\35\2\3\0\4\3\57\1\16\3\16\0\1\1\12\4"+
+    "\46\0\51\1\16\3\11\0\3\1\1\3\10\1\2\3\2\0\12\4"+
+    "\6\0\33\20\1\21\4\0\60\20\1\21\1\20\3\21\2\20\2\21"+
+    "\5\20\2\21\1\20\1\21\1\20\30\0\5\20\13\1\5\3\2\0"+
+    "\3\1\2\3\12\0\6\1\2\0\6\1\2\0\6\1\11\0\7\1"+
+    "\1\0\7\1\221\0\43\1\10\3\1\0\2\3\2\0\12\4\6\0"+
+    "\u2ba4\2\14\0\27\2\4\0\61\2\u2104\0\u016e\12\2\0\152\12\46\0"+
+    "\7\1\14\0\5\1\5\0\1\16\1\3\12\16\1\0\15\16\1\0"+
+    "\5\16\1\0\1\16\1\0\2\16\1\0\2\16\1\0\12\16\142\1"+
+    "\41\0\u016b\1\22\0\100\1\2\0\66\1\50\0\14\1\4\0\20\3"+
+    "\1\7\2\0\1\6\1\7\13\0\7\3\14\0\2\11\30\0\3\11"+
+    "\1\7\1\0\1\10\1\0\1\7\1\6\32\0\5\1\1\0\207\1"+
+    "\2\0\1\3\7\0\1\10\4\0\1\7\1\0\1\10\1\0\12\4"+
+    "\1\6\1\7\5\0\32\1\4\0\1\11\1\0\32\1\13\0\70\5"+
+    "\2\3\37\2\3\0\6\2\2\0\6\2\2\0\6\2\2\0\3\2"+
+    "\34\0\3\3\4\0\14\1\1\0\32\1\1\0\23\1\1\0\2\1"+
+    "\1\0\17\1\2\0\16\1\42\0\173\1\105\0\65\1\210\0\1\3"+
+    "\202\0\35\1\3\0\61\1\57\0\37\1\21\0\33\1\65\0\36\1"+
+    "\2\0\44\1\4\0\10\1\1\0\5\1\52\0\236\1\2\0\12\4"+
+    "\u0356\0\6\1\2\0\1\1\1\0\54\1\1\0\2\1\3\0\1\1"+
+    "\2\0\27\1\252\0\26\1\12\0\32\1\106\0\70\1\6\0\2\1"+
+    "\100\0\1\1\3\3\1\0\2\3\5\0\4\3\4\1\1\0\3\1"+
+    "\1\0\33\1\4\0\3\3\4\0\1\3\40\0\35\1\203\0\66\1"+
+    "\12\0\26\1\12\0\23\1\215\0\111\1\u03b7\0\3\3\65\1\17\3"+
+    "\37\0\12\4\20\0\3\3\55\1\13\3\2\0\1\3\22\0\31\1"+
+    "\7\0\12\4\6\0\3\3\44\1\16\3\1\0\12\4\100\0\3\3"+
+    "\60\1\16\3\4\1\13\0\12\4\u04a6\0\53\1\15\3\10\0\12\4"+
+    "\u0936\0\u036f\1\221\0\143\1\u0b9d\0\u042f\1\u33d1\0\u0239\1\u04c7\0\105\1"+
+    "\13\0\1\1\56\3\20\0\4\3\15\1\u4060\0\1\5\1\13\u2163\0"+
+    "\5\3\3\0\26\3\2\0\7\3\36\0\4\3\224\0\3\3\u01bb\0"+
+    "\125\1\1\0\107\1\1\0\2\1\2\0\1\1\2\0\2\1\2\0"+
+    "\4\1\1\0\14\1\1\0\1\1\1\0\7\1\1\0\101\1\1\0"+
+    "\4\1\2\0\10\1\1\0\7\1\1\0\34\1\1\0\4\1\1\0"+
+    "\5\1\1\0\1\1\3\0\7\1\1\0\u0154\1\2\0\31\1\1\0"+
+    "\31\1\1\0\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0"+
+    "\37\1\1\0\31\1\1\0\37\1\1\0\31\1\1\0\10\1\2\0"+
+    "\62\4\u1600\0\4\1\1\0\33\1\1\0\2\1\1\0\1\1\2\0"+
+    "\1\1\1\0\12\1\1\0\4\1\1\0\1\1\1\0\1\1\6\0"+
+    "\1\1\4\0\1\1\1\0\1\1\1\0\1\1\1\0\3\1\1\0"+
+    "\2\1\1\0\1\1\2\0\1\1\1\0\1\1\1\0\1\1\1\0"+
+    "\1\1\1\0\1\1\1\0\2\1\1\0\1\1\2\0\4\1\1\0"+
+    "\7\1\1\0\4\1\1\0\4\1\1\0\1\1\1\0\12\1\1\0"+
+    "\21\1\5\0\3\1\1\0\5\1\1\0\21\1\u032a\0\32\17\1\13"+
+    "\u0dff\0\ua6d7\12\51\0\u1035\12\13\0\336\12\u3fe2\0\u021e\12\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\uffff\0\u05ee\0"+
+    "\1\3\36\0\140\3\200\0\360\3\uffff\0\uffff\0\ufe12\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+  /** 
+   * Translates DFA states to action switch labels.
+   */
+  private static final int [] ZZ_ACTION = zzUnpackAction();
+
+  private static final String ZZ_ACTION_PACKED_0 =
+    "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+
+    "\1\7\1\2\1\1\1\10\1\2\1\0\1\2\1\0"+
+    "\1\4\1\0\2\2\2\0\1\1\1\0";
+
+  private static int [] zzUnpackAction() {
+    int [] result = new int[24];
+    int offset = 0;
+    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAction(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+  private static final String ZZ_ROWMAP_PACKED_0 =
+    "\0\0\0\22\0\44\0\66\0\110\0\132\0\154\0\176"+
+    "\0\220\0\242\0\264\0\306\0\330\0\352\0\374\0\u010e"+
+    "\0\u0120\0\154\0\u0132\0\u0144\0\u0156\0\264\0\u0168\0\u017a";
+
+  private static int [] zzUnpackRowMap() {
+    int [] result = new int[24];
+    int offset = 0;
+    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+    int i = 0;  /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int high = packed.charAt(i++) << 16;
+      result[j++] = high | packed.charAt(i++);
+    }
+    return j;
+  }
+
+  /** 
+   * The transition table of the DFA
+   */
+  private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+  private static final String ZZ_TRANS_PACKED_0 =
+    "\1\2\1\3\1\4\1\2\1\5\1\6\3\2\1\7"+
+    "\1\10\1\11\2\2\1\12\1\13\2\14\23\0\3\3"+
+    "\1\15\1\0\1\16\1\0\1\16\1\17\2\0\1\16"+
+    "\1\0\1\12\2\0\1\3\1\0\1\3\2\4\1\15"+
+    "\1\0\1\16\1\0\1\16\1\17\2\0\1\16\1\0"+
+    "\1\12\2\0\1\4\1\0\2\3\2\5\2\0\2\20"+
+    "\1\21\2\0\1\20\1\0\1\12\2\0\1\5\3\0"+
+    "\1\6\1\0\1\6\3\0\1\17\7\0\1\6\1\0"+
+    "\2\3\1\22\1\5\1\23\3\0\1\22\4\0\1\12"+
+    "\2\0\1\22\3\0\1\10\15\0\1\10\3\0\1\11"+
+    "\15\0\1\11\1\0\2\3\1\12\1\15\1\0\1\16"+
+    "\1\0\1\16\1\17\2\0\1\24\1\25\1\12\2\0"+
+    "\1\12\3\0\1\26\13\0\1\27\1\0\1\26\3\0"+
+    "\1\14\14\0\2\14\1\0\2\3\2\15\2\0\2\30"+
+    "\1\17\2\0\1\30\1\0\1\12\2\0\1\15\1\0"+
+    "\2\3\1\16\12\0\1\3\2\0\1\16\1\0\2\3"+
+    "\1\17\1\15\1\23\3\0\1\17\4\0\1\12\2\0"+
+    "\1\17\3\0\1\20\1\5\14\0\1\20\1\0\2\3"+
+    "\1\21\1\5\1\23\3\0\1\21\4\0\1\12\2\0"+
+    "\1\21\3\0\1\23\1\0\1\23\3\0\1\17\7\0"+
+    "\1\23\1\0\2\3\1\24\1\15\4\0\1\17\4\0"+
+    "\1\12\2\0\1\24\3\0\1\25\12\0\1\24\2\0"+
+    "\1\25\3\0\1\27\13\0\1\27\1\0\1\27\3\0"+
+    "\1\30\1\15\14\0\1\30";
+
+  private static int [] zzUnpackTrans() {
+    int [] result = new int[396];
+    int offset = 0;
+    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackTrans(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unkown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+  private static final String ZZ_ATTRIBUTE_PACKED_0 =
+    "\1\0\1\11\13\1\1\0\1\1\1\0\1\1\1\0"+
+    "\2\1\2\0\1\1\1\0";
+
+  private static int [] zzUnpackAttribute() {
+    int [] result = new int[24];
+    int offset = 0;
+    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true <=> the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /** denotes if the user-EOF-code has already been executed */
+  private boolean zzEOFDone;
+  
+  /** 
+   * The number of occupied positions in zzBuffer beyond zzEndRead.
+   * When a lead/high surrogate has been read from the input stream
+   * into the final zzBuffer position, this will have a value of 1;
+   * otherwise, it will have a value of 0.
+   */
+  private int zzFinalHighSurrogate = 0;
+
+  /* user code: */
+  /** Alphanumeric sequences */
+  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
+  
+  /** Numbers */
+  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
+  
+  /**
+   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+   * together as as a single token rather than broken up, because the logic
+   * required to break them at word boundaries is too complex for UAX#29.
+   * <p>
+   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+   */
+  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
+  
+  /** Idiographic token type */
+  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
+  
+  /** Hiragana token type */
+  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
+  
+  /** Katakana token type */
+  public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
+
+  /** Hangul token type */
+  public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+
+  /** Character count processed so far */
+  public final int yychar()
+  {
+    return yychar;
+  }
+
+  /**
+   * Fills CharTermAttribute with the current token text.
+   */
+  public final void getText(CharTermAttribute t) {
+    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+  }
+  
+  /**
+   * Sets the scanner buffer size in chars
+   */
+   public final void setBufferSize(int numChars) {
+     ZZ_BUFFERSIZE = numChars;
+     char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
+     System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
+     zzBuffer = newZzBuffer;
+   }
+
+
+  /**
+   * Creates a new scanner
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  public StandardTokenizerImpl(java.io.Reader in) {
+    this.zzReader = in;
+  }
+
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] zzUnpackCMap(String packed) {
+    char [] map = new char[0x110000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 2836) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      zzEndRead += zzFinalHighSurrogate;
+      zzFinalHighSurrogate = 0;
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+
+    /* fill the buffer with new input */
+    int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;           
+    int totalRead = 0;
+    while (totalRead < requested) {
+      int numRead = zzReader.read(zzBuffer, zzEndRead + totalRead, requested - totalRead);
+      if (numRead == -1) {
+        break;
+      }
+      totalRead += numRead;
+    }
+
+    if (totalRead > 0) {
+      zzEndRead += totalRead;
+      if (totalRead == requested) { /* possibly more input available */
+        if (Character.isHighSurrogate(zzBuffer[zzEndRead - 1])) {
+          --zzEndRead;
+          zzFinalHighSurrogate = 1;
+          if (totalRead == 1) { return true; }
+        }
+      }
+      return false;
+    }
+
+    // totalRead = 0: End of stream
+    return true;
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * Internal scan buffer is resized down to its initial length, if it has grown.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEOFDone = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = 0;
+    zzFinalHighSurrogate = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+    if (zzBuffer.length > ZZ_BUFFERSIZE)
+      zzBuffer = new char[ZZ_BUFFERSIZE];
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void zzScanError(int errorCode) {
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+    throw new Error(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  public void yypushback(int number)  {
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  public int getNextToken() throws java.io.IOException {
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+    int [] zzTransL = ZZ_TRANS;
+    int [] zzRowMapL = ZZ_ROWMAP;
+    int [] zzAttrL = ZZ_ATTRIBUTE;
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+      yychar+= zzMarkedPosL-zzStartRead;
+
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+      zzState = ZZ_LEXSTATE[zzLexicalState];
+
+      // set up zzAction for empty match case:
+      int zzAttributes = zzAttrL[zzState];
+      if ( (zzAttributes & 1) == 1 ) {
+        zzAction = zzState;
+      }
+
+
+      zzForAction: {
+        while (true) {
+    
+          if (zzCurrentPosL < zzEndReadL) {
+            zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+            zzCurrentPosL += Character.charCount(zzInput);
+          }
+          else if (zzAtEOF) {
+            zzInput = YYEOF;
+            break zzForAction;
+          }
+          else {
+            // store back cached positions
+            zzCurrentPos  = zzCurrentPosL;
+            zzMarkedPos   = zzMarkedPosL;
+            boolean eof = zzRefill();
+            // get translated positions and possibly new buffer
+            zzCurrentPosL  = zzCurrentPos;
+            zzMarkedPosL   = zzMarkedPos;
+            zzBufferL      = zzBuffer;
+            zzEndReadL     = zzEndRead;
+            if (eof) {
+              zzInput = YYEOF;
+              break zzForAction;
+            }
+            else {
+              zzInput = Character.codePointAt(zzBufferL, zzCurrentPosL, zzEndReadL);
+              zzCurrentPosL += Character.charCount(zzInput);
+            }
+          }
+          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+          if (zzNext == -1) break zzForAction;
+          zzState = zzNext;
+
+          zzAttributes = zzAttrL[zzState];
+          if ( (zzAttributes & 1) == 1 ) {
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+            if ( (zzAttributes & 8) == 8 ) break zzForAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+
+      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+        case 1: 
+          { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */
+          }
+        case 9: break;
+        case 2: 
+          { return WORD_TYPE;
+          }
+        case 10: break;
+        case 3: 
+          { return HANGUL_TYPE;
+          }
+        case 11: break;
+        case 4: 
+          { return NUMERIC_TYPE;
+          }
+        case 12: break;
+        case 5: 
+          { return KATAKANA_TYPE;
+          }
+        case 13: break;
+        case 6: 
+          { return IDEOGRAPHIC_TYPE;
+          }
+        case 14: break;
+        case 7: 
+          { return HIRAGANA_TYPE;
+          }
+        case 15: break;
+        case 8: 
+          { return SOUTH_EAST_ASIAN_TYPE;
+          }
+        case 16: break;
+        default: 
+          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+            zzAtEOF = true;
+              {
+                return YYEOF;
+              }
+          } 
+          else {
+            zzScanError(ZZ_NO_MATCH);
+          }
+      }
+    }
+  }
+
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
new file mode 100644
index 0000000..24c401d
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.standard;
+
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * This class implements Word Break rules from the Unicode Text Segmentation 
+ * algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>. 
+ * <p>
+ * Tokens produced are of the following types:
+ * <ul>
+ *   <li>&lt;ALPHANUM&gt;: A sequence of alphabetic and numeric characters</li>
+ *   <li>&lt;NUM&gt;: A number</li>
+ *   <li>&lt;SOUTHEAST_ASIAN&gt;: A sequence of characters from South and Southeast
+ *       Asian languages, including Thai, Lao, Myanmar, and Khmer</li>
+ *   <li>&lt;IDEOGRAPHIC&gt;: A single CJKV ideographic character</li>
+ *   <li>&lt;HIRAGANA&gt;: A single hiragana character</li>
+ *   <li>&lt;KATAKANA&gt;: A sequence of katakana characters</li>
+ *   <li>&lt;HANGUL&gt;: A sequence of Hangul characters</li>
+ * </ul>
+ */
+@SuppressWarnings("fallthrough")
+%%
+
+%unicode 6.3
+%integer
+%final
+%public
+%class StandardTokenizerImpl
+%function getNextToken
+%char
+%buffer 255
+
+// UAX#29 WB4. X (Extend | Format)* --> X
+//
+HangulEx            = [\p{Script:Hangul}&&[\p{WB:ALetter}\p{WB:Hebrew_Letter}]] [\p{WB:Format}\p{WB:Extend}]*
+HebrewOrALetterEx   = [\p{WB:HebrewLetter}\p{WB:ALetter}]                       [\p{WB:Format}\p{WB:Extend}]*
+NumericEx           = [\p{WB:Numeric}[\p{Blk:HalfAndFullForms}&&\p{Nd}]]        [\p{WB:Format}\p{WB:Extend}]*
+KatakanaEx          = \p{WB:Katakana}                                           [\p{WB:Format}\p{WB:Extend}]* 
+MidLetterEx         = [\p{WB:MidLetter}\p{WB:MidNumLet}\p{WB:SingleQuote}]      [\p{WB:Format}\p{WB:Extend}]* 
+MidNumericEx        = [\p{WB:MidNum}\p{WB:MidNumLet}\p{WB:SingleQuote}]         [\p{WB:Format}\p{WB:Extend}]*
+ExtendNumLetEx      = \p{WB:ExtendNumLet}                                       [\p{WB:Format}\p{WB:Extend}]*
+HanEx               = \p{Script:Han}                                            [\p{WB:Format}\p{WB:Extend}]*
+HiraganaEx          = \p{Script:Hiragana}                                       [\p{WB:Format}\p{WB:Extend}]*
+SingleQuoteEx       = \p{WB:Single_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
+DoubleQuoteEx       = \p{WB:Double_Quote}                                       [\p{WB:Format}\p{WB:Extend}]*
+HebrewLetterEx      = \p{WB:Hebrew_Letter}                                      [\p{WB:Format}\p{WB:Extend}]*
+RegionalIndicatorEx = \p{WB:RegionalIndicator}                                  [\p{WB:Format}\p{WB:Extend}]*
+ComplexContextEx    = \p{LB:Complex_Context}                                    [\p{WB:Format}\p{WB:Extend}]*
+
+%{
+  /** Alphanumeric sequences */
+  public static final int WORD_TYPE = StandardTokenizer.ALPHANUM;
+  
+  /** Numbers */
+  public static final int NUMERIC_TYPE = StandardTokenizer.NUM;
+  
+  /**
+   * Chars in class \p{Line_Break = Complex_Context} are from South East Asian
+   * scripts (Thai, Lao, Myanmar, Khmer, etc.).  Sequences of these are kept 
+   * together as as a single token rather than broken up, because the logic
+   * required to break them at word boundaries is too complex for UAX#29.
+   * <p>
+   * See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
+   */
+  public static final int SOUTH_EAST_ASIAN_TYPE = StandardTokenizer.SOUTHEAST_ASIAN;
+  
+  /** Idiographic token type */
+  public static final int IDEOGRAPHIC_TYPE = StandardTokenizer.IDEOGRAPHIC;
+  
+  /** Hiragana token type */
+  public static final int HIRAGANA_TYPE = StandardTokenizer.HIRAGANA;
+  
+  /** Katakana token type */
+  public static final int KATAKANA_TYPE = StandardTokenizer.KATAKANA;
+
+  /** Hangul token type */
+  public static final int HANGUL_TYPE = StandardTokenizer.HANGUL;
+
+  /** Character count processed so far */
+  public final int yychar()
+  {
+    return yychar;
+  }
+
+  /**
+   * Fills CharTermAttribute with the current token text.
+   */
+  public final void getText(CharTermAttribute t) {
+    t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+  }
+  
+  /**
+   * Sets the scanner buffer size in chars
+   */
+   public final void setBufferSize(int numChars) {
+     ZZ_BUFFERSIZE = numChars;
+     char[] newZzBuffer = new char[ZZ_BUFFERSIZE];
+     System.arraycopy(zzBuffer, 0, newZzBuffer, 0, Math.min(zzBuffer.length, ZZ_BUFFERSIZE));
+     zzBuffer = newZzBuffer;
+   }
+%}
+
+%%
+
+// UAX#29 WB1.   sot   �
+//        WB2.     �   eot
+//
+<<EOF>> { return YYEOF; }
+
+// UAX#29 WB8.   Numeric � Numeric
+//        WB11.  Numeric (MidNum | MidNumLet | Single_Quote) � Numeric
+//        WB12.  Numeric � (MidNum | MidNumLet | Single_Quote) Numeric
+//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
+//        WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}* {NumericEx} ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx} )* {ExtendNumLetEx}* 
+  { return NUMERIC_TYPE; }
+
+// subset of the below for typing purposes only!
+{HangulEx}+
+  { return HANGUL_TYPE; }
+  
+{KatakanaEx}+
+  { return KATAKANA_TYPE; }
+
+// UAX#29 WB5.   (ALetter | Hebrew_Letter) � (ALetter | Hebrew_Letter)
+//        WB6.   (ALetter | Hebrew_Letter) � (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
+//        WB7.   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote) � (ALetter | Hebrew_Letter)
+//        WB7a.  Hebrew_Letter � Single_Quote
+//        WB7b.  Hebrew_Letter � Double_Quote Hebrew_Letter
+//        WB7c.  Hebrew_Letter Double_Quote � Hebrew_Letter
+//        WB9.   (ALetter | Hebrew_Letter) � Numeric
+//        WB10.  Numeric � (ALetter | Hebrew_Letter)
+//        WB13.  Katakana � Katakana
+//        WB13a. (ALetter | Hebrew_Letter | Numeric | Katakana | ExtendNumLet) � ExtendNumLet
+//        WB13b. ExtendNumLet � (ALetter | Hebrew_Letter | Numeric | Katakana) 
+//
+{ExtendNumLetEx}*  ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
+                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+                     )+
+                   )
+({ExtendNumLetEx}+ ( {KatakanaEx}          ( {ExtendNumLetEx}*   {KatakanaEx}                           )*
+                   | ( {HebrewLetterEx}    ( {SingleQuoteEx}     | {DoubleQuoteEx}  {HebrewLetterEx}    )
+                     | {NumericEx}         ( ( {ExtendNumLetEx}* | {MidNumericEx} ) {NumericEx}         )*
+                     | {HebrewOrALetterEx} ( ( {ExtendNumLetEx}* | {MidLetterEx}  ) {HebrewOrALetterEx} )*
+                     )+
+                   )
+)*
+{ExtendNumLetEx}* 
+  { return WORD_TYPE; }
+
+
+// From UAX #29:
+//
+//    [C]haracters with the Line_Break property values of Contingent_Break (CB), 
+//    Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word 
+//    boundary property values based on criteria outside of the scope of this
+//    annex.  That means that satisfactory treatment of languages like Chinese
+//    or Thai requires special handling.
+// 
+// In Unicode 6.3, only one character has the \p{Line_Break = Contingent_Break}
+// property: U+FFFC ( \ufffc ) OBJECT REPLACEMENT CHARACTER.
+//
+// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
+// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
+// Lao, etc.) are kept together.  This grammar does the same below.
+//
+// See also the Unicode Line Breaking Algorithm:
+//
+//    http://www.unicode.org/reports/tr14/#SA
+//
+{ComplexContextEx}+ { return SOUTH_EAST_ASIAN_TYPE; }
+
+// UAX#29 WB14.  Any � Any
+//
+{HanEx} { return IDEOGRAPHIC_TYPE; }
+{HiraganaEx} { return HIRAGANA_TYPE; }
+
+
+// UAX#29 WB3.   CR � LF
+//        WB3a.  (Newline | CR | LF) �
+//        WB3b.  � (Newline | CR | LF)
+//        WB13c. Regional_Indicator � Regional_Indicator
+//        WB14.  Any � Any
+//
+{RegionalIndicatorEx} {RegionalIndicatorEx}+ | [^]
+  { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/analysis/standard/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/analysis/standard/package-info.java b/lucene/core/src/java/org/apache/lucene/analysis/standard/package-info.java
new file mode 100644
index 0000000..39ce8f9
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/analysis/standard/package-info.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Fast, general-purpose grammar-based tokenizer {@link org.apache.lucene.analysis.standard.StandardTokenizer}
+ * implements the Word Break rules from the Unicode Text Segmentation algorithm, as specified in 
+ * <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+ * Unlike <code>UAX29URLEmailTokenizer</code> from the analysis module, URLs and email addresses are
+ * <b>not</b> tokenized as single tokens, but are instead split up into 
+ * tokens according to the UAX#29 word break rules.
+ * <br>
+ * {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
+ * {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
+ * {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
+ * {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+ * and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+ */
+
+package org.apache.lucene.analysis.standard;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
index 50d2482..368259a 100644
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriterConfig.java
@@ -21,6 +21,7 @@ import java.io.PrintStream;
 import java.util.EnumSet;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.index.IndexWriter.IndexReaderWarmer;
 import org.apache.lucene.search.Sort;
@@ -121,7 +122,21 @@ public final class IndexWriterConfig extends LiveIndexWriterConfig {
   }
   
   /**
-   * Creates a new config that with the default {@link
+   * Creates a new config, using {@link StandardAnalyzer} as the
+   * analyzer.  By default, {@link TieredMergePolicy} is used
+   * for merging;
+   * Note that {@link TieredMergePolicy} is free to select
+   * non-contiguous merges, which means docIDs may not
+   * remain monotonic over time.  If this is a problem you
+   * should switch to {@link LogByteSizeMergePolicy} or
+   * {@link LogDocMergePolicy}.
+   */
+  public IndexWriterConfig() {
+    this(new StandardAnalyzer());
+  }
+  
+  /**
+   * Creates a new config that with the provided {@link
    * Analyzer}. By default, {@link TieredMergePolicy} is used
    * for merging;
    * Note that {@link TieredMergePolicy} is free to select

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java b/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
index 3fda7c3..82281a9e 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Direct16.java
@@ -62,7 +62,7 @@ final class Direct16 extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // values ref
         + RamUsageEstimator.sizeOf(values);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java b/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
index aec9eaf..502aa3f 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Direct32.java
@@ -62,7 +62,7 @@ final class Direct32 extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // values ref
         + RamUsageEstimator.sizeOf(values);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java b/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
index b8e06b6..106f641 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Direct64.java
@@ -57,7 +57,7 @@ final class Direct64 extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // values ref
         + RamUsageEstimator.sizeOf(values);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java b/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
index 81fc5a9..27986c0 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Direct8.java
@@ -60,7 +60,7 @@ final class Direct8 extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // values ref
         + RamUsageEstimator.sizeOf(values);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java b/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
index 02f4e41..8e8e94d 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Packed16ThreeBlocks.java
@@ -112,7 +112,7 @@ final class Packed16ThreeBlocks extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
         + RamUsageEstimator.sizeOf(blocks);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java b/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
index 85e7ea8..a7262b3 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Packed64SingleBlock.java
@@ -61,7 +61,7 @@ abstract class Packed64SingleBlock extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
         + RamUsageEstimator.sizeOf(blocks);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java b/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
index 3ec6df0..5a85735 100644
--- a/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
+++ b/lucene/core/src/java/org/apache/lucene/util/packed/Packed8ThreeBlocks.java
@@ -110,7 +110,7 @@ final class Packed8ThreeBlocks extends PackedInts.MutableImpl {
   public long ramBytesUsed() {
     return RamUsageEstimator.alignObjectSize(
         RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
-        + 2 * Integer.BYTES     // valueCount,bitsPerValue
+        + 2 * RamUsageEstimator.NUM_BYTES_INT     // valueCount,bitsPerValue
         + RamUsageEstimator.NUM_BYTES_OBJECT_REF) // blocks ref
         + RamUsageEstimator.sizeOf(blocks);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/overview.html b/lucene/core/src/java/overview.html
index 9086cf9..b7112ac 100644
--- a/lucene/core/src/java/overview.html
+++ b/lucene/core/src/java/overview.html
@@ -78,7 +78,7 @@ to the output of a {@link org.apache.lucene.analysis.Tokenizer Tokenizer}.&nbsp;
 Tokenizers and TokenFilters are strung together and applied with an {@link org.apache.lucene.analysis.Analyzer Analyzer}.&nbsp;
 <a href="../analyzers-common/overview-summary.html">analyzers-common</a> provides a number of Analyzer implementations, including 
 <a href="../analyzers-common/org/apache/lucene/analysis/core/StopAnalyzer.html">StopAnalyzer</a>
-and the grammar-based <a href="../analyzers-common/org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.</li>
+and the grammar-based <a href="org/apache/lucene/analysis/standard/StandardAnalyzer.html">StandardAnalyzer</a>.</li>
 
 <li>
 <b>{@link org.apache.lucene.codecs}</b>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestCharArrayMap.java b/lucene/core/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
new file mode 100644
index 0000000..2d63b66
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestCharArrayMap.java
@@ -0,0 +1,244 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.util.*;
+
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestCharArrayMap extends LuceneTestCase {
+  public void doRandom(int iter, boolean ignoreCase) {
+    CharArrayMap<Integer> map = new CharArrayMap<>(1, ignoreCase);
+    HashMap<String,Integer> hmap = new HashMap<>();
+
+    char[] key;
+    for (int i=0; i<iter; i++) {
+      int len = random().nextInt(5);
+      key = new char[len];
+      for (int j=0; j<key.length; j++) {
+        key[j] = (char)random().nextInt(127);
+      }
+      String keyStr = new String(key);
+      String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 
+
+      int val = random().nextInt();
+
+      Object o1 = map.put(key, val);
+      Object o2 = hmap.put(hmapKey,val);
+      assertEquals(o1,o2);
+
+      // add it again with the string method
+      assertEquals(val, map.put(keyStr,val).intValue());
+
+      assertEquals(val, map.get(key,0,key.length).intValue());
+      assertEquals(val, map.get(key).intValue());
+      assertEquals(val, map.get(keyStr).intValue());
+
+      assertEquals(hmap.size(), map.size());
+    }
+  }
+
+  public void testCharArrayMap() {
+    int num = 5 * RANDOM_MULTIPLIER;
+    for (int i = 0; i < num; i++) { // pump this up for more random testing
+      doRandom(1000,false);
+      doRandom(1000,true);      
+    }
+  }
+
+  public void testMethods() {
+    CharArrayMap<Integer> cm = new CharArrayMap<>(2, false);
+    HashMap<String,Integer> hm = new HashMap<>();
+    hm.put("foo",1);
+    hm.put("bar",2);
+    cm.putAll(hm);
+    assertEquals(hm.size(), cm.size());
+    hm.put("baz", 3);
+    cm.putAll(hm);
+    assertEquals(hm.size(), cm.size());
+
+    CharArraySet cs = cm.keySet();
+    int n=0;
+    for (Object o : cs) {
+      assertTrue(cm.containsKey(o));
+      char[] co = (char[]) o;
+      assertTrue(cm.containsKey(co, 0, co.length));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+    assertEquals(hm.size(), cs.size());
+    assertEquals(cm.size(), cs.size());
+    cs.clear();
+    assertEquals(0, cs.size());
+    assertEquals(0, cm.size());
+    // keySet() should not allow adding new keys
+    expectThrows(UnsupportedOperationException.class, () -> {
+      cs.add("test");
+    });
+
+    cm.putAll(hm);
+    assertEquals(hm.size(), cs.size());
+    assertEquals(cm.size(), cs.size());
+
+    Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
+    n=0;
+    while (iter1.hasNext()) {
+      Map.Entry<Object,Integer> entry = iter1.next();
+      Object key = entry.getKey();
+      Integer val = entry.getValue();
+      assertEquals(cm.get(key), val);
+      entry.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(key));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+    cm.clear();
+    cm.putAll(hm);
+    assertEquals(cm.size(), n);
+
+    CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
+    n=0;
+    while (iter2.hasNext()) {
+      char[] keyc = iter2.nextKey();
+      Integer val = iter2.currentValue();
+      assertEquals(hm.get(new String(keyc)), val);
+      iter2.setValue(val*100);
+      assertEquals(val*100, (int)cm.get(keyc));
+      n++;
+    }
+    assertEquals(hm.size(), n);
+
+    cm.entrySet().clear();
+    assertEquals(0, cm.size());
+    assertEquals(0, cm.entrySet().size());
+    assertTrue(cm.isEmpty());
+  }
+
+  // TODO: break this up into simpler test methods vs. "telling a story"
+  public void testModifyOnUnmodifiable(){
+    CharArrayMap<Integer> map = new CharArrayMap<>(2, false);
+    map.put("foo",1);
+    map.put("bar",2);
+    final int size = map.size();
+    assertEquals(2, size);
+    assertTrue(map.containsKey("foo"));  
+    assertEquals(1, map.get("foo").intValue());  
+    assertTrue(map.containsKey("bar"));  
+    assertEquals(2, map.get("bar").intValue());  
+
+    map = CharArrayMap.unmodifiableMap(map);
+    assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
+    String NOT_IN_MAP = "SirGallahad";
+    assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
+    assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
+    
+    try{
+      map.put(NOT_IN_MAP.toCharArray(), 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put(NOT_IN_MAP, 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put(new StringBuilder(NOT_IN_MAP), 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.entrySet().clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.keySet().clear();  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.put((Object) NOT_IN_MAP, 3);  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    try{
+      map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));  
+      fail("Modified unmodifiable map");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
+      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
+      assertEquals("Size of unmodifiable map has changed", size, map.size());
+    }
+    
+    assertTrue(map.containsKey("foo"));  
+    assertEquals(1, map.get("foo").intValue());  
+    assertTrue(map.containsKey("bar"));  
+    assertEquals(2, map.get("bar").intValue());  
+  }
+  
+  public void testToString() {
+    CharArrayMap<Integer> cm = new CharArrayMap<>(Collections.singletonMap("test",1), false);
+    assertEquals("[test]",cm.keySet().toString());
+    assertEquals("[1]",cm.values().toString());
+    assertEquals("[test=1]",cm.entrySet().toString());
+    assertEquals("{test=1}",cm.toString());
+    cm.put("test2", 2);
+    assertTrue(cm.keySet().toString().contains(", "));
+    assertTrue(cm.values().toString().contains(", "));
+    assertTrue(cm.entrySet().toString().contains(", "));
+    assertTrue(cm.toString().contains(", "));
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/TestCharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestCharArraySet.java b/lucene/core/src/test/org/apache/lucene/analysis/TestCharArraySet.java
new file mode 100644
index 0000000..465f512
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestCharArraySet.java
@@ -0,0 +1,430 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.util.*;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.util.LuceneTestCase;
+
+
+public class TestCharArraySet extends LuceneTestCase {
+  
+  static final String[] TEST_STOP_WORDS = {
+    "a", "an", "and", "are", "as", "at", "be", "but", "by",
+    "for", "if", "in", "into", "is", "it",
+    "no", "not", "of", "on", "or", "such",
+    "that", "the", "their", "then", "there", "these",
+    "they", "this", "to", "was", "will", "with"
+  };
+  
+  
+  public void testRehash() throws Exception {
+    CharArraySet cas = new CharArraySet(0, true);
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      cas.add(TEST_STOP_WORDS[i]);
+    assertEquals(TEST_STOP_WORDS.length, cas.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
+  }
+
+  public void testNonZeroOffset() {
+    String[] words={"Hello","World","this","is","a","test"};
+    char[] findme="xthisy".toCharArray();   
+    CharArraySet set= new CharArraySet(10, true);
+    set.addAll(Arrays.asList(words));
+    assertTrue(set.contains(findme, 1, 4));
+    assertTrue(set.contains(new String(findme,1,4)));
+    
+    // test unmodifiable
+    set = CharArraySet.unmodifiableSet(set);
+    assertTrue(set.contains(findme, 1, 4));
+    assertTrue(set.contains(new String(findme,1,4)));
+  }
+  
+  public void testObjectContains() {
+    CharArraySet set = new CharArraySet(10, true);
+    Integer val = Integer.valueOf(1);
+    set.add(val);
+    assertTrue(set.contains(val));
+    assertTrue(set.contains(new Integer(1))); // another integer
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
+    // test unmodifiable
+    set = CharArraySet.unmodifiableSet(set);
+    assertTrue(set.contains(val));
+    assertTrue(set.contains(new Integer(1))); // another integer
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
+  }
+  
+  public void testClear(){
+    CharArraySet set=new CharArraySet(10,true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
+    set.clear();
+    assertEquals("not empty", 0, set.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertFalse(set.contains(TEST_STOP_WORDS[i]));
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
+    for(int i=0;i<TEST_STOP_WORDS.length;i++)
+      assertTrue(set.contains(TEST_STOP_WORDS[i]));
+  }
+  
+  // TODO: break this up into simpler test methods, vs "telling a story"
+  public void testModifyOnUnmodifiable(){
+    CharArraySet set=new CharArraySet(10, true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    final int size = set.size();
+    set = CharArraySet.unmodifiableSet(set);
+    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+    String NOT_IN_SET = "SirGallahad";
+    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
+    
+    try{
+      set.add(NOT_IN_SET.toCharArray());  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.add(NOT_IN_SET);  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.add(new StringBuilder(NOT_IN_SET));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.clear();  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    try{
+      set.add((Object) NOT_IN_SET);  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
+    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
+    // remove() on the iterator
+    try{
+      set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true));  
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertEquals("Size of unmodifiable set has changed", size, set.size());
+    }
+    
+    try{
+      set.addAll(Arrays.asList(NOT_IN_SET));
+      fail("Modified unmodifiable set");
+    }catch (UnsupportedOperationException e) {
+      // expected
+      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
+    }
+    
+    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
+      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
+    }
+  }
+  
+  public void testUnmodifiableSet(){
+    CharArraySet set = new CharArraySet(10,true);
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+    set.add(Integer.valueOf(1));
+    final int size = set.size();
+    set = CharArraySet.unmodifiableSet(set);
+    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
+    for (String stopword : TEST_STOP_WORDS) {
+      assertTrue(set.contains(stopword));
+    }
+    assertTrue(set.contains(Integer.valueOf(1)));
+    assertTrue(set.contains("1"));
+    assertTrue(set.contains(new char[]{'1'}));
+    
+    expectThrows(NullPointerException.class, () -> {
+      CharArraySet.unmodifiableSet(null);
+    });
+  }
+  
+  public void testSupplementaryChars() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    // for reference see
+    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
+    String[] upperArr = new String[] {"Abc\ud801\udc1c",
+        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
+    String[] lowerArr = new String[] {"abc\ud801\udc44",
+        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
+    CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+  }
+  
+  public void testSingleHighSurrogate() {
+    String missing = "Term %s is missing in the set";
+    String falsePos = "Term %s is in the set but shouldn't";
+    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
+        "\uD800EfG", "\uD800\ud801\udc1cB" };
+
+    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
+        "\uD800efg", "\uD800\ud801\udc44b" };
+    CharArraySet set = new CharArraySet(Arrays
+        .asList(TEST_STOP_WORDS), true);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
+    }
+    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS),
+        false);
+    for (String upper : upperArr) {
+      set.add(upper);
+    }
+    for (int i = 0; i < upperArr.length; i++) {
+      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
+      assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
+          .contains(lowerArr[i]));
+    }
+  }
+  
+  @SuppressWarnings("deprecated")
+  public void testCopyCharArraySetBWCompat() {
+    CharArraySet setIngoreCase = new CharArraySet(10, true);
+    CharArraySet setCaseSensitive = new CharArraySet(10, false);
+
+    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
+    List<String> stopwordsUpper = new ArrayList<>();
+    for (String string : stopwords) {
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
+    }
+    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setIngoreCase.add(Integer.valueOf(1));
+    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setCaseSensitive.add(Integer.valueOf(1));
+
+    CharArraySet copy = CharArraySet.copy(setIngoreCase);
+    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
+
+    assertEquals(setIngoreCase.size(), copy.size());
+    assertEquals(setCaseSensitive.size(), copy.size());
+
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copyCaseSens.containsAll(stopwords));
+    for (String string : stopwordsUpper) {
+      assertFalse(copyCaseSens.contains(string));
+    }
+    // test adding terms to the copy
+    List<String> newWords = new ArrayList<>();
+    for (String string : stopwords) {
+      newWords.add(string+"_1");
+    }
+    copy.addAll(newWords);
+    
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copy.containsAll(newWords));
+    // new added terms are not in the source set
+    for (String string : newWords) {
+      assertFalse(setIngoreCase.contains(string));  
+      assertFalse(setCaseSensitive.contains(string));  
+
+    }
+  }
+  
+  /**
+   * Test the static #copy() function with a CharArraySet as a source
+   */
+  public void testCopyCharArraySet() {
+    CharArraySet setIngoreCase = new CharArraySet(10, true);
+    CharArraySet setCaseSensitive = new CharArraySet(10, false);
+
+    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
+    List<String> stopwordsUpper = new ArrayList<>();
+    for (String string : stopwords) {
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
+    }
+    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setIngoreCase.add(Integer.valueOf(1));
+    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
+    setCaseSensitive.add(Integer.valueOf(1));
+
+    CharArraySet copy = CharArraySet.copy(setIngoreCase);
+    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
+
+    assertEquals(setIngoreCase.size(), copy.size());
+    assertEquals(setCaseSensitive.size(), copy.size());
+
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copyCaseSens.containsAll(stopwords));
+    for (String string : stopwordsUpper) {
+      assertFalse(copyCaseSens.contains(string));
+    }
+    // test adding terms to the copy
+    List<String> newWords = new ArrayList<>();
+    for (String string : stopwords) {
+      newWords.add(string+"_1");
+    }
+    copy.addAll(newWords);
+    
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(stopwordsUpper));
+    assertTrue(copy.containsAll(newWords));
+    // new added terms are not in the source set
+    for (String string : newWords) {
+      assertFalse(setIngoreCase.contains(string));  
+      assertFalse(setCaseSensitive.contains(string));  
+
+    }
+  }
+  
+  /**
+   * Test the static #copy() function with a JDK {@link Set} as a source
+   */
+  public void testCopyJDKSet() {
+    Set<String> set = new HashSet<>();
+
+    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
+    List<String> stopwordsUpper = new ArrayList<>();
+    for (String string : stopwords) {
+      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
+    }
+    set.addAll(Arrays.asList(TEST_STOP_WORDS));
+
+    CharArraySet copy = CharArraySet.copy(set);
+
+    assertEquals(set.size(), copy.size());
+    assertEquals(set.size(), copy.size());
+
+    assertTrue(copy.containsAll(stopwords));
+    for (String string : stopwordsUpper) {
+      assertFalse(copy.contains(string));
+    }
+    
+    List<String> newWords = new ArrayList<>();
+    for (String string : stopwords) {
+      newWords.add(string+"_1");
+    }
+    copy.addAll(newWords);
+    
+    assertTrue(copy.containsAll(stopwords));
+    assertTrue(copy.containsAll(newWords));
+    // new added terms are not in the source set
+    for (String string : newWords) {
+      assertFalse(set.contains(string));  
+    }
+  }
+  
+  /**
+   * Tests a special case of {@link CharArraySet#copy(Set)} where the
+   * set to copy is the {@link CharArraySet#EMPTY_SET}
+   */
+  public void testCopyEmptySet() {
+    assertSame(CharArraySet.EMPTY_SET, 
+        CharArraySet.copy(CharArraySet.EMPTY_SET));
+  }
+
+  /**
+   * Smoketests the static empty set
+   */
+  public void testEmptySet() {
+    assertEquals(0, CharArraySet.EMPTY_SET.size());
+    
+    assertTrue(CharArraySet.EMPTY_SET.isEmpty());
+    for (String stopword : TEST_STOP_WORDS) {
+      assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
+    }
+    assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
+    assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
+    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
+    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
+  }
+  
+  /**
+   * Test for NPE
+   */
+  public void testContainsWithNull() {
+    CharArraySet set = new CharArraySet(1, true);
+
+    expectThrows(NullPointerException.class, () -> {
+      set.contains((char[]) null, 0, 10);
+    });
+
+    expectThrows(NullPointerException.class, () -> {
+      set.contains((CharSequence) null);
+    });
+
+    expectThrows(NullPointerException.class, () -> {
+      set.contains((Object) null);
+    });
+  }
+  
+  public void testToString() {
+    CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
+    assertEquals("[test]", set.toString());
+    set.add("test2");
+    assertTrue(set.toString().contains(", "));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/TestCharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestCharacterUtils.java b/lucene/core/src/test/org/apache/lucene/analysis/TestCharacterUtils.java
new file mode 100644
index 0000000..53b3f56
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestCharacterUtils.java
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.TestUtil;
+import org.junit.Test;
+
+/**
+ * TestCase for the {@link CharacterUtils} class.
+ */
+public class TestCharacterUtils extends LuceneTestCase {
+
+  public void testConversions() {
+    final char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
+    final int[] buf = new int[orig.length];
+    final char[] restored = new char[buf.length];
+    final int o1 = TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
+    final int o2 = TestUtil.nextInt(random(), 0, o1);
+    final int o3 = TestUtil.nextInt(random(), 0, o1);
+    final int codePointCount = CharacterUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
+    final int charCount = CharacterUtils.toChars(buf, o2, codePointCount, restored, o3);
+    assertEquals(orig.length - o1, charCount);
+    assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored, o3, o3 + charCount));
+  }
+
+  @Test
+  public void testNewCharacterBuffer() {
+    CharacterBuffer newCharacterBuffer = CharacterUtils.newCharacterBuffer(1024);
+    assertEquals(1024, newCharacterBuffer.getBuffer().length);
+    assertEquals(0, newCharacterBuffer.getOffset());
+    assertEquals(0, newCharacterBuffer.getLength());
+
+    newCharacterBuffer = CharacterUtils.newCharacterBuffer(2);
+    assertEquals(2, newCharacterBuffer.getBuffer().length);
+    assertEquals(0, newCharacterBuffer.getOffset());
+    assertEquals(0, newCharacterBuffer.getLength());
+
+    // length must be >= 2
+    expectThrows(IllegalArgumentException.class, () -> {
+      CharacterUtils.newCharacterBuffer(1);
+    });
+  }
+
+  @Test
+  public void testFillNoHighSurrogate() throws IOException {
+    Reader reader = new StringReader("helloworld");
+    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
+    assertTrue(CharacterUtils.fill(buffer,reader));
+    assertEquals(0, buffer.getOffset());
+    assertEquals(6, buffer.getLength());
+    assertEquals("hellow", new String(buffer.getBuffer()));
+    assertFalse(CharacterUtils.fill(buffer,reader));
+    assertEquals(4, buffer.getLength());
+    assertEquals(0, buffer.getOffset());
+
+    assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
+        buffer.getLength()));
+    assertFalse(CharacterUtils.fill(buffer,reader));
+  }
+
+  @Test
+  public void testFill() throws IOException {
+    String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
+    Reader reader = new StringReader(input);
+    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
+    assertTrue(CharacterUtils.fill(buffer, reader));
+    assertEquals(4, buffer.getLength());
+    assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(),
+        buffer.getLength()));
+    assertTrue(CharacterUtils.fill(buffer, reader));
+    assertEquals(5, buffer.getLength());
+    assertEquals("\ud801\udc1c789", new String(buffer.getBuffer()));
+    assertTrue(CharacterUtils.fill(buffer, reader));
+    assertEquals(4, buffer.getLength());
+    assertEquals("123\ud801", new String(buffer.getBuffer(),
+        buffer.getOffset(), buffer.getLength()));
+    assertFalse(CharacterUtils.fill(buffer, reader));
+    assertEquals(3, buffer.getLength());
+    assertEquals("\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
+        .getOffset(), buffer.getLength()));
+    assertFalse(CharacterUtils.fill(buffer, reader));
+    assertEquals(0, buffer.getLength());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
new file mode 100644
index 0000000..c224682
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.util.English;
+
+public class TestStopFilter extends BaseTokenStreamTestCase {
+  
+  // other StopFilter functionality is already tested by TestStopAnalyzer
+
+  public void testExactCase() throws IOException {
+    StringReader reader = new StringReader("Now is The Time");
+    CharArraySet stopWords = new CharArraySet(asSet("is", "the", "Time"), false);
+    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    in.setReader(reader);
+    TokenStream stream = new StopFilter(in, stopWords);
+    assertTokenStreamContents(stream, new String[] { "Now", "The" });
+  }
+
+  public void testStopFilt() throws IOException {
+    StringReader reader = new StringReader("Now is The Time");
+    String[] stopWords = new String[] { "is", "the", "Time" };
+    CharArraySet stopSet = StopFilter.makeStopSet(stopWords);
+    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    in.setReader(reader);
+    TokenStream stream = new StopFilter(in, stopSet);
+    assertTokenStreamContents(stream, new String[] { "Now", "The" });
+  }
+
+  /**
+   * Test Position increments applied by StopFilter with and without enabling this option.
+   */
+  public void testStopPositons() throws IOException {
+    StringBuilder sb = new StringBuilder();
+    ArrayList<String> a = new ArrayList<>();
+    for (int i=0; i<20; i++) {
+      String w = English.intToEnglish(i).trim();
+      sb.append(w).append(" ");
+      if (i%3 != 0) a.add(w);
+    }
+    log(sb.toString());
+    String stopWords[] = a.toArray(new String[0]);
+    for (int i=0; i<a.size(); i++) log("Stop: "+stopWords[i]);
+    CharArraySet stopSet = StopFilter.makeStopSet(stopWords);
+    // with increments
+    StringReader reader = new StringReader(sb.toString());
+    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    in.setReader(reader);
+    StopFilter stpf = new StopFilter(in, stopSet);
+    doTestStopPositons(stpf);
+    // with increments, concatenating two stop filters
+    ArrayList<String> a0 = new ArrayList<>();
+    ArrayList<String> a1 = new ArrayList<>();
+    for (int i=0; i<a.size(); i++) {
+      if (i%2==0) { 
+        a0.add(a.get(i));
+      } else {
+        a1.add(a.get(i));
+      }
+    }
+    String stopWords0[] =  a0.toArray(new String[0]);
+    for (int i=0; i<a0.size(); i++) log("Stop0: "+stopWords0[i]);
+    String stopWords1[] =  a1.toArray(new String[0]);
+    for (int i=0; i<a1.size(); i++) log("Stop1: "+stopWords1[i]);
+    CharArraySet stopSet0 = StopFilter.makeStopSet(stopWords0);
+    CharArraySet stopSet1 = StopFilter.makeStopSet(stopWords1);
+    reader = new StringReader(sb.toString());
+    final MockTokenizer in1 = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    in1.setReader(reader);
+    StopFilter stpf0 = new StopFilter(in1, stopSet0); // first part of the set
+    StopFilter stpf01 = new StopFilter(stpf0, stopSet1); // two stop filters concatenated!
+    doTestStopPositons(stpf01);
+  }
+
+  // LUCENE-3849: make sure after .end() we see the "ending" posInc
+  public void testEndStopword() throws Exception {
+    CharArraySet stopSet = StopFilter.makeStopSet("of");
+    final MockTokenizer in = new MockTokenizer(MockTokenizer.WHITESPACE, false);
+    in.setReader(new StringReader("test of"));
+    StopFilter stpf = new StopFilter(in, stopSet);
+    assertTokenStreamContents(stpf, new String[] { "test" },
+                              new int[] {0},
+                              new int[] {4},
+                              null,
+                              new int[] {1},
+                              null,
+                              7,
+                              1,
+                              null,
+                              true);    
+  }
+  
+  private void doTestStopPositons(StopFilter stpf) throws IOException {
+    CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
+    PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);
+    stpf.reset();
+    for (int i=0; i<20; i+=3) {
+      assertTrue(stpf.incrementToken());
+      log("Token "+i+": "+stpf);
+      String w = English.intToEnglish(i).trim();
+      assertEquals("expecting token "+i+" to be "+w,w,termAtt.toString());
+      assertEquals("all but first token must have position increment of 3",i==0?1:3,posIncrAtt.getPositionIncrement());
+    }
+    assertFalse(stpf.incrementToken());
+    stpf.end();
+    stpf.close();
+  }
+  
+  // print debug info depending on VERBOSE
+  private static void log(String s) {
+    if (VERBOSE) {
+      System.out.println(s);
+    }
+  }
+  
+  // stupid filter that inserts synonym of 'hte' for 'the'
+  private class MockSynonymFilter extends TokenFilter {
+    State bufferedState;
+    CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+    PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+
+    MockSynonymFilter(TokenStream input) {
+      super(input);
+    }
+
+    @Override
+    public boolean incrementToken() throws IOException {
+      if (bufferedState != null) {
+        restoreState(bufferedState);
+        posIncAtt.setPositionIncrement(0);
+        termAtt.setEmpty().append("hte");
+        bufferedState = null;
+        return true;
+      } else if (input.incrementToken()) {
+        if (termAtt.toString().equals("the")) {
+          bufferedState = captureState();
+        }
+        return true;
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public void reset() throws IOException {
+      super.reset();
+      bufferedState = null;
+    }
+  }
+
+}

[06/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
deleted file mode 100644
index ec37924..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/usr/bin/perl
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-use warnings;
-use strict;
-use File::Spec;
-use Getopt::Long;
-use LWP::UserAgent;
-
-my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
-
-my $version = '';
-unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
-  print STDERR "Usage: $script_name -v <version>\n";
-  print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
-      if ($version);
-  exit 1;
-}
-my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
-my $scripts_url = "${url_prefix}/Scripts.txt";
-my $line_break_url = "${url_prefix}/LineBreak.txt";
-my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
-my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
-my $underscore_version = $version;
-$underscore_version =~ s/\./_/g;
-my $class_name = "WordBreakTestUnicode_${underscore_version}";
-my $output_filename = "${class_name}.java";
-my $header =<<"__HEADER__";
-package org.apache.lucene.analysis.core;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by ${script_name}
- * from: ${url_prefix}/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \\p{Script = Han}                (From $scripts_url)
- *    \\p{Script = Hiragana}
- *    \\p{LineBreak = Complex_Context} (From $line_break_url)
- *    \\p{WordBreak = ALetter}         (From $word_break_url)
- *    \\p{WordBreak = Hebrew_Letter}
- *    \\p{WordBreak = Katakana}
- *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
- */
-\@Ignore
-public class ${class_name} extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-__HEADER__
-
-my $codepoints = [];
-map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
-# Complex_Context is an alias for 'SA', which is used in LineBreak.txt
-# Using lowercase versions of property value names to allow for case-
-# insensitive comparison with the names in the Unicode data files.
-parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
-parse_Unicode_data_file($scripts_url, $codepoints, 
-                        {'han' => 1, 'hiragana' => 1});
-parse_Unicode_data_file($word_break_url, $codepoints,
-                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
-my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
-
-my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
-open OUT, ">$output_path"
-  || die "Error opening '$output_path' for writing: $!";
-
-print STDERR "Writing '$output_path'...";
-
-print OUT $header;
-
-for my $line (@tests) {
-  next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
-  # Example line: � 0001 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-  my ($sequence) = $line =~ /^(.*?)\s*\#/;
-  $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
-  print OUT "    // $line\n";
-  $sequence =~ s/\s*�\s*$//; # Trim trailing break character
-  my $test_string = $sequence;
-  $test_string =~ s/\s*�\s*/\\u/g;
-  $test_string =~ s/\s*�\s*/\\u/g;
-  $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
-  $test_string =~ s/\\u000A/\\n/g;
-  $test_string =~ s/\\u000D/\\r/g;
-  $test_string =~ s/\\u0022/\\\"/g;
-  $sequence =~ s/^\s*�\s*//; # Trim leading break character
-  my @tokens = ();
-  for my $candidate (split /\s*�\s*/, $sequence) {
-    my @chars = ();
-    my $has_wanted_char = 0;
-    while ($candidate =~ /([0-9A-F]+)/gi) {
-      my $hexchar = $1;
-      if (4 == length($hexchar)) {
-        push @chars, $hexchar;
-      } else {
-        push @chars, above_BMP_char_to_surrogates($hexchar);
-      }
-      unless ($has_wanted_char) {
-        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
-      }
-    }
-    if ($has_wanted_char) {
-      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
-    }
-  }
-  print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";
-  print OUT "                     new String[] { ";
-  print OUT join(", ", @tokens), " });\n\n";
-}
-
-print OUT "  }\n}\n";
-close OUT;
-print STDERR "done.\n";
-
-
-# sub above_BMP_char_to_surrogates
-#
-# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
-# to the corresponding UTF-16 surrogate pair
-#
-# Assumption: input string is a sequence more than four hex digits
-#
-sub above_BMP_char_to_surrogates {
-  my $ch = hex(shift);
-  my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
-  my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
-  return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
-}
-
-
-# sub parse_Unicode_data_file
-#
-# Downloads and parses the specified Unicode data file, parses it, and
-# extracts code points assigned any of the given property values, defining
-# the corresponding array position in the passed-in target array.
-#
-# Takes in the following parameters:
-#
-#  - URL of the Unicode data file to download and parse
-#  - Reference to target array
-#  - Reference to hash of property values to get code points for
-#
-sub parse_Unicode_data_file {
-  my $url = shift;
-  my $target = shift;
-  my $wanted_property_values = shift;
-  my $content = get_URL_content($url);
-  print STDERR "Parsing '$url'...";
-  my @lines = split /\r?\n/, $content;
-  for (@lines) {
-    s/\s*#.*//;         # Strip trailing comments
-    s/\s+$//;           # Strip trailing space
-    next unless (/\S/); # Skip empty lines
-    my ($start, $end, $property_value);
-    if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
-      # 00AA       ; LATIN
-      $start = $end = hex $1;
-      $property_value = lc $2; # Property value names are case-insensitive
-    } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
-      # 0AE6..0AEF ; Gujarati
-      $start = hex $1;
-      $end = hex $2;
-      $property_value = lc $3; # Property value names are case-insensitive
-    } else {
-      next;
-    }
-    if (defined($wanted_property_values->{$property_value})) {
-      for my $code_point ($start..$end) {
-        $target->[$code_point] = 1;
-      }
-    }
-  }
-  print STDERR "done.\n";
-}
-
-# sub get_URL_content
-#
-# Retrieves and returns the content of the given URL.
-#
-sub get_URL_content {
-  my $url = shift;
-  print STDERR "Retrieving '$url'...";
-  my $user_agent = LWP::UserAgent->new;
-  my $request = HTTP::Request->new(GET => $url);
-  my $response = $user_agent->request($request);
-  unless ($response->is_success) {
-    print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
-    exit 1;
-  }
-  print STDERR "done.\n";
-  return $response->content;
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
index b9d586e..b7f45cb 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
index ebe1034..fd7aefd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index c4b9276..580e269 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.th;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
index 9972702..9cfc6fc 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
deleted file mode 100644
index 66b0dce..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-import java.util.*;
-
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestCharArrayMap extends LuceneTestCase {
-  public void doRandom(int iter, boolean ignoreCase) {
-    CharArrayMap<Integer> map = new CharArrayMap<>(1, ignoreCase);
-    HashMap<String,Integer> hmap = new HashMap<>();
-
-    char[] key;
-    for (int i=0; i<iter; i++) {
-      int len = random().nextInt(5);
-      key = new char[len];
-      for (int j=0; j<key.length; j++) {
-        key[j] = (char)random().nextInt(127);
-      }
-      String keyStr = new String(key);
-      String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 
-
-      int val = random().nextInt();
-
-      Object o1 = map.put(key, val);
-      Object o2 = hmap.put(hmapKey,val);
-      assertEquals(o1,o2);
-
-      // add it again with the string method
-      assertEquals(val, map.put(keyStr,val).intValue());
-
-      assertEquals(val, map.get(key,0,key.length).intValue());
-      assertEquals(val, map.get(key).intValue());
-      assertEquals(val, map.get(keyStr).intValue());
-
-      assertEquals(hmap.size(), map.size());
-    }
-  }
-
-  public void testCharArrayMap() {
-    int num = 5 * RANDOM_MULTIPLIER;
-    for (int i = 0; i < num; i++) { // pump this up for more random testing
-      doRandom(1000,false);
-      doRandom(1000,true);      
-    }
-  }
-
-  public void testMethods() {
-    CharArrayMap<Integer> cm = new CharArrayMap<>(2, false);
-    HashMap<String,Integer> hm = new HashMap<>();
-    hm.put("foo",1);
-    hm.put("bar",2);
-    cm.putAll(hm);
-    assertEquals(hm.size(), cm.size());
-    hm.put("baz", 3);
-    cm.putAll(hm);
-    assertEquals(hm.size(), cm.size());
-
-    CharArraySet cs = cm.keySet();
-    int n=0;
-    for (Object o : cs) {
-      assertTrue(cm.containsKey(o));
-      char[] co = (char[]) o;
-      assertTrue(cm.containsKey(co, 0, co.length));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-    assertEquals(hm.size(), cs.size());
-    assertEquals(cm.size(), cs.size());
-    cs.clear();
-    assertEquals(0, cs.size());
-    assertEquals(0, cm.size());
-    // keySet() should not allow adding new keys
-    expectThrows(UnsupportedOperationException.class, () -> {
-      cs.add("test");
-    });
-
-    cm.putAll(hm);
-    assertEquals(hm.size(), cs.size());
-    assertEquals(cm.size(), cs.size());
-
-    Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
-    n=0;
-    while (iter1.hasNext()) {
-      Map.Entry<Object,Integer> entry = iter1.next();
-      Object key = entry.getKey();
-      Integer val = entry.getValue();
-      assertEquals(cm.get(key), val);
-      entry.setValue(val*100);
-      assertEquals(val*100, (int)cm.get(key));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-    cm.clear();
-    cm.putAll(hm);
-    assertEquals(cm.size(), n);
-
-    CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
-    n=0;
-    while (iter2.hasNext()) {
-      char[] keyc = iter2.nextKey();
-      Integer val = iter2.currentValue();
-      assertEquals(hm.get(new String(keyc)), val);
-      iter2.setValue(val*100);
-      assertEquals(val*100, (int)cm.get(keyc));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-
-    cm.entrySet().clear();
-    assertEquals(0, cm.size());
-    assertEquals(0, cm.entrySet().size());
-    assertTrue(cm.isEmpty());
-  }
-
-  // TODO: break this up into simpler test methods vs. "telling a story"
-  public void testModifyOnUnmodifiable(){
-    CharArrayMap<Integer> map = new CharArrayMap<>(2, false);
-    map.put("foo",1);
-    map.put("bar",2);
-    final int size = map.size();
-    assertEquals(2, size);
-    assertTrue(map.containsKey("foo"));  
-    assertEquals(1, map.get("foo").intValue());  
-    assertTrue(map.containsKey("bar"));  
-    assertEquals(2, map.get("bar").intValue());  
-
-    map = CharArrayMap.unmodifiableMap(map);
-    assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
-    String NOT_IN_MAP = "SirGallahad";
-    assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
-    assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
-    
-    try{
-      map.put(NOT_IN_MAP.toCharArray(), 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put(NOT_IN_MAP, 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put(new StringBuilder(NOT_IN_MAP), 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.entrySet().clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.keySet().clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put((Object) NOT_IN_MAP, 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    assertTrue(map.containsKey("foo"));  
-    assertEquals(1, map.get("foo").intValue());  
-    assertTrue(map.containsKey("bar"));  
-    assertEquals(2, map.get("bar").intValue());  
-  }
-  
-  public void testToString() {
-    CharArrayMap<Integer> cm = new CharArrayMap<>(Collections.singletonMap("test",1), false);
-    assertEquals("[test]",cm.keySet().toString());
-    assertEquals("[1]",cm.values().toString());
-    assertEquals("[test=1]",cm.entrySet().toString());
-    assertEquals("{test=1}",cm.toString());
-    cm.put("test2", 2);
-    assertTrue(cm.keySet().toString().contains(", "));
-    assertTrue(cm.values().toString().contains(", "));
-    assertTrue(cm.entrySet().toString().contains(", "));
-    assertTrue(cm.toString().contains(", "));
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
deleted file mode 100644
index 1fcee65..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.*;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-
-public class TestCharArraySet extends LuceneTestCase {
-  
-  static final String[] TEST_STOP_WORDS = {
-    "a", "an", "and", "are", "as", "at", "be", "but", "by",
-    "for", "if", "in", "into", "is", "it",
-    "no", "not", "of", "on", "or", "such",
-    "that", "the", "their", "then", "there", "these",
-    "they", "this", "to", "was", "will", "with"
-  };
-  
-  
-  public void testRehash() throws Exception {
-    CharArraySet cas = new CharArraySet(0, true);
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      cas.add(TEST_STOP_WORDS[i]);
-    assertEquals(TEST_STOP_WORDS.length, cas.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
-  }
-
-  public void testNonZeroOffset() {
-    String[] words={"Hello","World","this","is","a","test"};
-    char[] findme="xthisy".toCharArray();   
-    CharArraySet set= new CharArraySet(10, true);
-    set.addAll(Arrays.asList(words));
-    assertTrue(set.contains(findme, 1, 4));
-    assertTrue(set.contains(new String(findme,1,4)));
-    
-    // test unmodifiable
-    set = CharArraySet.unmodifiableSet(set);
-    assertTrue(set.contains(findme, 1, 4));
-    assertTrue(set.contains(new String(findme,1,4)));
-  }
-  
-  public void testObjectContains() {
-    CharArraySet set = new CharArraySet(10, true);
-    Integer val = Integer.valueOf(1);
-    set.add(val);
-    assertTrue(set.contains(val));
-    assertTrue(set.contains(new Integer(1))); // another integer
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-    // test unmodifiable
-    set = CharArraySet.unmodifiableSet(set);
-    assertTrue(set.contains(val));
-    assertTrue(set.contains(new Integer(1))); // another integer
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-  }
-  
-  public void testClear(){
-    CharArraySet set=new CharArraySet(10,true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    set.clear();
-    assertEquals("not empty", 0, set.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertFalse(set.contains(TEST_STOP_WORDS[i]));
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertTrue(set.contains(TEST_STOP_WORDS[i]));
-  }
-  
-  // TODO: break this up into simpler test methods, vs "telling a story"
-  public void testModifyOnUnmodifiable(){
-    CharArraySet set=new CharArraySet(10, true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    final int size = set.size();
-    set = CharArraySet.unmodifiableSet(set);
-    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
-    String NOT_IN_SET = "SirGallahad";
-    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
-    
-    try{
-      set.add(NOT_IN_SET.toCharArray());  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.add(NOT_IN_SET);  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.add(new StringBuilder(NOT_IN_SET));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.clear();  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    try{
-      set.add((Object) NOT_IN_SET);  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
-    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
-    // remove() on the iterator
-    try{
-      set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.addAll(Arrays.asList(NOT_IN_SET));
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-    }
-    
-    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
-      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
-    }
-  }
-  
-  public void testUnmodifiableSet(){
-    CharArraySet set = new CharArraySet(10,true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    set.add(Integer.valueOf(1));
-    final int size = set.size();
-    set = CharArraySet.unmodifiableSet(set);
-    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
-    for (String stopword : TEST_STOP_WORDS) {
-      assertTrue(set.contains(stopword));
-    }
-    assertTrue(set.contains(Integer.valueOf(1)));
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-    
-    expectThrows(NullPointerException.class, () -> {
-      CharArraySet.unmodifiableSet(null);
-    });
-  }
-  
-  public void testSupplementaryChars() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    // for reference see
-    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
-    String[] upperArr = new String[] {"Abc\ud801\udc1c",
-        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
-    String[] lowerArr = new String[] {"abc\ud801\udc44",
-        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
-    CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-  }
-  
-  public void testSingleHighSurrogate() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
-        "\uD800EfG", "\uD800\ud801\udc1cB" };
-
-    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
-        "\uD800efg", "\uD800\ud801\udc44b" };
-    CharArraySet set = new CharArraySet(Arrays
-        .asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS),
-        false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
-          .contains(lowerArr[i]));
-    }
-  }
-  
-  @SuppressWarnings("deprecated")
-  public void testCopyCharArraySetBWCompat() {
-    CharArraySet setIngoreCase = new CharArraySet(10, true);
-    CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setIngoreCase.add(Integer.valueOf(1));
-    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setCaseSensitive.add(Integer.valueOf(1));
-
-    CharArraySet copy = CharArraySet.copy(setIngoreCase);
-    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
-    assertEquals(setIngoreCase.size(), copy.size());
-    assertEquals(setCaseSensitive.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copyCaseSens.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copyCaseSens.contains(string));
-    }
-    // test adding terms to the copy
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(setIngoreCase.contains(string));  
-      assertFalse(setCaseSensitive.contains(string));  
-
-    }
-  }
-  
-  /**
-   * Test the static #copy() function with a CharArraySet as a source
-   */
-  public void testCopyCharArraySet() {
-    CharArraySet setIngoreCase = new CharArraySet(10, true);
-    CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setIngoreCase.add(Integer.valueOf(1));
-    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setCaseSensitive.add(Integer.valueOf(1));
-
-    CharArraySet copy = CharArraySet.copy(setIngoreCase);
-    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
-    assertEquals(setIngoreCase.size(), copy.size());
-    assertEquals(setCaseSensitive.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copyCaseSens.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copyCaseSens.contains(string));
-    }
-    // test adding terms to the copy
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(setIngoreCase.contains(string));  
-      assertFalse(setCaseSensitive.contains(string));  
-
-    }
-  }
-  
-  /**
-   * Test the static #copy() function with a JDK {@link Set} as a source
-   */
-  public void testCopyJDKSet() {
-    Set<String> set = new HashSet<>();
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-
-    CharArraySet copy = CharArraySet.copy(set);
-
-    assertEquals(set.size(), copy.size());
-    assertEquals(set.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copy.contains(string));
-    }
-    
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(set.contains(string));  
-    }
-  }
-  
-  /**
-   * Tests a special case of {@link CharArraySet#copy(Set)} where the
-   * set to copy is the {@link CharArraySet#EMPTY_SET}
-   */
-  public void testCopyEmptySet() {
-    assertSame(CharArraySet.EMPTY_SET, 
-        CharArraySet.copy(CharArraySet.EMPTY_SET));
-  }
-
-  /**
-   * Smoketests the static empty set
-   */
-  public void testEmptySet() {
-    assertEquals(0, CharArraySet.EMPTY_SET.size());
-    
-    assertTrue(CharArraySet.EMPTY_SET.isEmpty());
-    for (String stopword : TEST_STOP_WORDS) {
-      assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
-    }
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
-    assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
-  }
-  
-  /**
-   * Test for NPE
-   */
-  public void testContainsWithNull() {
-    CharArraySet set = new CharArraySet(1, true);
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((char[]) null, 0, 10);
-    });
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((CharSequence) null);
-    });
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((Object) null);
-    });
-  }
-  
-  public void testToString() {
-    CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
-    assertEquals("[test]", set.toString());
-    set.add("test2");
-    assertTrue(set.toString().contains(", "));
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
deleted file mode 100644
index 04e96ea..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.junit.Test;
-
-/**
- * TestCase for the {@link CharacterUtils} class.
- */
-public class TestCharacterUtils extends LuceneTestCase {
-
-  public void testConversions() {
-    final char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
-    final int[] buf = new int[orig.length];
-    final char[] restored = new char[buf.length];
-    final int o1 = TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
-    final int o2 = TestUtil.nextInt(random(), 0, o1);
-    final int o3 = TestUtil.nextInt(random(), 0, o1);
-    final int codePointCount = CharacterUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
-    final int charCount = CharacterUtils.toChars(buf, o2, codePointCount, restored, o3);
-    assertEquals(orig.length - o1, charCount);
-    assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored, o3, o3 + charCount));
-  }
-
-  @Test
-  public void testNewCharacterBuffer() {
-    CharacterBuffer newCharacterBuffer = CharacterUtils.newCharacterBuffer(1024);
-    assertEquals(1024, newCharacterBuffer.getBuffer().length);
-    assertEquals(0, newCharacterBuffer.getOffset());
-    assertEquals(0, newCharacterBuffer.getLength());
-
-    newCharacterBuffer = CharacterUtils.newCharacterBuffer(2);
-    assertEquals(2, newCharacterBuffer.getBuffer().length);
-    assertEquals(0, newCharacterBuffer.getOffset());
-    assertEquals(0, newCharacterBuffer.getLength());
-
-    // length must be >= 2
-    expectThrows(IllegalArgumentException.class, () -> {
-      CharacterUtils.newCharacterBuffer(1);
-    });
-  }
-
-  @Test
-  public void testFillNoHighSurrogate() throws IOException {
-    Reader reader = new StringReader("helloworld");
-    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
-    assertTrue(CharacterUtils.fill(buffer,reader));
-    assertEquals(0, buffer.getOffset());
-    assertEquals(6, buffer.getLength());
-    assertEquals("hellow", new String(buffer.getBuffer()));
-    assertFalse(CharacterUtils.fill(buffer,reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals(0, buffer.getOffset());
-
-    assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
-        buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer,reader));
-  }
-
-  @Test
-  public void testFill() throws IOException {
-    String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
-    Reader reader = new StringReader(input);
-    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(),
-        buffer.getLength()));
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(5, buffer.getLength());
-    assertEquals("\ud801\udc1c789", new String(buffer.getBuffer()));
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals("123\ud801", new String(buffer.getBuffer(),
-        buffer.getOffset(), buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer, reader));
-    assertEquals(3, buffer.getLength());
-    assertEquals("\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
-        .getOffset(), buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer, reader));
-    assertEquals(0, buffer.getLength());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
index 5e1d3c1..f8c1198 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
@@ -24,13 +24,13 @@ import java.util.List;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
index be90611..eaa6174 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
@@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
deleted file mode 100644
index b1dd1b5..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-import org.apache.lucene.analysis.util.WordlistLoader;
-
-public class TestWordlistLoader extends LuceneTestCase {
-
-  public void testWordlistLoading() throws IOException {
-    String s = "ONE\n  two \nthree";
-    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
-    checkSet(wordSet1);
-    CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
-    checkSet(wordSet2);
-  }
-
-  public void testComments() throws Exception {
-    String s = "ONE\n  two \nthree\n#comment";
-    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
-    checkSet(wordSet1);
-    assertFalse(wordSet1.contains("#comment"));
-    assertFalse(wordSet1.contains("comment"));
-  }
-
-
-  private void checkSet(CharArraySet wordset) {
-    assertEquals(3, wordset.size());
-    assertTrue(wordset.contains("ONE"));  // case is not modified
-    assertTrue(wordset.contains("two"));  // surrounding whitespace is removed
-    assertTrue(wordset.contains("three"));
-    assertFalse(wordset.contains("four"));
-  }
-
-  /**
-   * Test stopwords in snowball format
-   */
-  public void testSnowballListLoading() throws IOException {
-    String s = 
-      "|comment\n" + // commented line
-      " |comment\n" + // commented line with leading whitespace
-      "\n" + // blank line
-      "  \t\n" + // line with only whitespace
-      " |comment | comment\n" + // commented line with comment
-      "ONE\n" + // stopword, in uppercase
-      "   two   \n" + // stopword with leading/trailing space
-      " three   four five \n" + // multiple stopwords
-      "six seven | comment\n"; //multiple stopwords + comment
-    CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
-    assertEquals(7, wordset.size());
-    assertTrue(wordset.contains("ONE"));
-    assertTrue(wordset.contains("two"));
-    assertTrue(wordset.contains("three"));
-    assertTrue(wordset.contains("four"));
-    assertTrue(wordset.contains("five"));
-    assertTrue(wordset.contains("six"));
-    assertTrue(wordset.contains("seven"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/icu/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html
index abb2e2a..bdace97 100644
--- a/lucene/analysis/icu/src/java/overview.html
+++ b/lucene/analysis/icu/src/java/overview.html
@@ -103,7 +103,7 @@ algorithm.
   </li>
   <li>
     Effective Locale-specific normalization (case differences, diacritics, etc.).
-    ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and 
+    ({@link org.apache.lucene.analysis.LowerCaseFilter} and 
     {@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
     in a generic way that doesn't take into account locale-specific needs.)
   </li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
index f2fd50a..17ea967 100644
--- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cjk.CJKBigramFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
index bff30f1..46d40b1 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cjk.CJKWidthFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
 import org.apache.lucene.analysis.ja.dict.UserDictionary;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * Analyzer for Japanese that uses morphological analysis.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index b8d0a78..a1af95e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -44,7 +44,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
  * input tokens tokens \uff13 and \uff12\u5343 and give outputs 3 and 2000 instead of 3200, which is
  * likely not the intended result. If you want to remove punctuation characters from your
  * index that are not part of normalized numbers, add a
- * {@link org.apache.lucene.analysis.core.StopFilter} with the punctuation you wish to
+ * {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to
  * remove after {@link JapaneseNumberFilter} in your analyzer chain.
  * <p>
  * Below are some examples of normalizations this filter supports. The input is untokenized
@@ -615,4 +615,4 @@ public class JapaneseNumberFilter extends TokenFilter {
       return position;
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
index 0ee9ccf..342295d 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ja;
 
 import java.util.Set;
 
-import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
 
 /**
  * Removes tokens that match a set of part-of-speech tags.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
index 8b5483c..a59de44 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
@@ -22,8 +22,8 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
index b9ebd36..ab6c473 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
@@ -21,11 +21,11 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase {
   private Analyzer analyzer;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
index bd14be3..bc57f56 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
@@ -17,16 +17,16 @@
 package org.apache.lucene.analysis.ja;
 
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-
-import java.io.IOException;
 
 /**
  * Tests for {@link JapaneseKatakanaStemFilter}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
index 27cef33..b8a987a 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
@@ -27,11 +27,11 @@ import java.nio.file.Paths;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.junit.Ignore;
 import org.junit.Test;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index 3429d86..b35523e 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -23,12 +23,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.regex.Pattern;
 
-import morfologik.stemming.Dictionary;
-import morfologik.stemming.DictionaryLookup;
-import morfologik.stemming.IStemmer;
-import morfologik.stemming.WordData;
-import morfologik.stemming.polish.PolishStemmer;
-
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -36,6 +30,12 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.CharsRefBuilder;
 
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
+
 /**
  * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
  * morphosyntactic (POS) tokens. Applies to Polish only.  

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
index d8967c7..c4294e3 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
@@ -22,13 +22,13 @@ import java.util.TreeSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * TODO: The tests below rely on the order of returned lemmas, which is probably not good. 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
index bd1fc7b..5f0347b 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
@@ -21,12 +21,12 @@ import java.nio.charset.StandardCharsets;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
index 999ce86..6ed4fda 100644
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@@ -22,18 +22,18 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.stempel.StempelStemmer;
 import org.apache.lucene.analysis.stempel.StempelFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
 import org.apache.lucene.util.IOUtils;
 import org.egothor.stemmer.Trie;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
index b0ef008..c37cedb 100644
--- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index b4074ac..94b7910 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2585,4 +2585,34 @@ The following arguments can be provided to ant to alter its behaviour and target
     </sequential>
   </macrodef>
 
+  <macrodef name="run-jflex">
+    <attribute name="dir"/>
+    <attribute name="name"/>
+    <sequential>
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+    </sequential>
+  </macrodef>
+
+  <macrodef name="run-jflex-and-disable-buffer-expansion">
+    <attribute name="dir"/>
+    <attribute name="name"/>
+    <sequential>
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+      <!-- LUCENE-5897: Disallow scanner buffer expansion -->
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
+                     replace="" flags="s" />
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="private static final int ZZ_BUFFERSIZE ="
+                     replace="private int ZZ_BUFFERSIZE ="/>
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="int requested = zzBuffer.length - zzEndRead;"
+                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
+                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
+    </sequential>
+  </macrodef>
+
+
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/build.xml
----------------------------------------------------------------------
diff --git a/lucene/core/build.xml b/lucene/core/build.xml
index 90da238..4e62e1c 100644
--- a/lucene/core/build.xml
+++ b/lucene/core/build.xml
@@ -133,7 +133,7 @@
     <delete file="${build.dir}/moman.zip"/>
   </target>
 
-  <target name="regenerate" depends="createLevAutomata,createPackedIntSources"/>
+  <target name="regenerate" depends="createLevAutomata,createPackedIntSources,jflex"/>
   
   <macrodef name="startLockStressTestClient">
     <attribute name="clientId"/>
@@ -223,4 +223,20 @@
   
   <target name="test" depends="common.test, test-lock-factory"/>
 
+  <target name="clean-jflex">
+    <delete>
+      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
+        <containsregexp expression="generated.*by.*JFlex"/>
+      </fileset>
+    </delete>
+  </target>
+
+  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-StandardAnalyzer"/>
+  
+  <target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
+    <run-jflex-and-disable-buffer-expansion 
+        dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
+  </target>
+
+
 </project>

[07/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
deleted file mode 100644
index 4a3731e..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/WordBreakTestUnicode_6_3_0.java
+++ /dev/null
@@ -1,5537 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.standard;
-
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by generateJavaUnicodeWordBreakTest.pl
- * from: http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \p{Script = Han}                (From http://www.unicode.org/Public/6.3.0/ucd/Scripts.txt)
- *    \p{Script = Hiragana}
- *    \p{LineBreak = Complex_Context} (From http://www.unicode.org/Public/6.3.0/ucd/LineBreak.txt)
- *    \p{WordBreak = ALetter}         (From http://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt)
- *    \p{WordBreak = Hebrew_Letter}
- *    \p{WordBreak = Katakana}
- *    \p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\uFF10-\uFF19]                (Full-width Arabic digits)
- */
-@Ignore
-public class WordBreakTestUnicode_6_3_0 extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-    // � 0001 � 0001 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0001",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 0001 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0001",
-                     new String[] {  });
-
-    // � 0001 � 000D �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\r",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 000D �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\r",
-                     new String[] {  });
-
-    // � 0001 � 000A �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\n",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 000A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\n",
-                     new String[] {  });
-
-    // � 0001 � 000B �  #  � [0.2] <START OF HEADING> (Other) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u000B",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 000B �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u000B",
-                     new String[] {  });
-
-    // � 0001 � 3031 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u3031",
-                     new String[] { "\u3031" });
-
-    // � 0001 � 0308 � 3031 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // � 0001 � 0041 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0041",
-                     new String[] { "\u0041" });
-
-    // � 0001 � 0308 � 0041 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // � 0001 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u003A",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u003A",
-                     new String[] {  });
-
-    // � 0001 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002C",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002C",
-                     new String[] {  });
-
-    // � 0001 � 002E �  #  � [0.2] <START OF HEADING> (Other) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u002E",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 002E �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u002E",
-                     new String[] {  });
-
-    // � 0001 � 0030 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0030",
-                     new String[] { "\u0030" });
-
-    // � 0001 � 0308 � 0030 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // � 0001 � 005F �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u005F",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 005F �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u005F",
-                     new String[] {  });
-
-    // � 0001 � 1F1E6 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 1F1E6 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 0001 � 05D0 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 0001 � 0308 � 05D0 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 0001 � 0022 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\"",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 0022 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\"",
-                     new String[] {  });
-
-    // � 0001 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0027",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0027",
-                     new String[] {  });
-
-    // � 0001 � 00AD �  #  � [0.2] <START OF HEADING> (Other) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u00AD",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 00AD �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u00AD",
-                     new String[] {  });
-
-    // � 0001 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0300",
-                     new String[] {  });
-
-    // � 0001 � 0308 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0300",
-                     new String[] {  });
-
-    // � 0001 � 0061 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 0001 � 0308 � 0061 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 0001 � 0061 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0308 � 0061 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0061 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0308 � 0061 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0061 � 0027 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0061 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0308 � 0061 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 0001 � 0031 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0308 � 0031 � 003A �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0031 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0308 � 0031 � 0027 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0031 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0308 � 0031 � 002C �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0031 � 002E � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 0001 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0001\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0001 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0001",
-                     new String[] {  });
-
-    // � 000D � 0308 � 0001 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0001",
-                     new String[] {  });
-
-    // � 000D � 000D �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\r",
-                     new String[] {  });
-
-    // � 000D � 0308 � 000D �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\r",
-                     new String[] {  });
-
-    // � 000D � 000A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.0] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\n",
-                     new String[] {  });
-
-    // � 000D � 0308 � 000A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\n",
-                     new String[] {  });
-
-    // � 000D � 000B �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u000B",
-                     new String[] {  });
-
-    // � 000D � 0308 � 000B �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u000B",
-                     new String[] {  });
-
-    // � 000D � 3031 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000D � 0308 � 3031 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000D � 0041 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000D � 0308 � 0041 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000D � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u003A",
-                     new String[] {  });
-
-    // � 000D � 0308 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u003A",
-                     new String[] {  });
-
-    // � 000D � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002C",
-                     new String[] {  });
-
-    // � 000D � 0308 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002C",
-                     new String[] {  });
-
-    // � 000D � 002E �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u002E",
-                     new String[] {  });
-
-    // � 000D � 0308 � 002E �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u002E",
-                     new String[] {  });
-
-    // � 000D � 0030 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000D � 0308 � 0030 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000D � 005F �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u005F",
-                     new String[] {  });
-
-    // � 000D � 0308 � 005F �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u005F",
-                     new String[] {  });
-
-    // � 000D � 1F1E6 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000D � 0308 � 1F1E6 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000D � 05D0 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000D � 0308 � 05D0 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000D � 0022 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\"",
-                     new String[] {  });
-
-    // � 000D � 0308 � 0022 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\"",
-                     new String[] {  });
-
-    // � 000D � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0027",
-                     new String[] {  });
-
-    // � 000D � 0308 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0027",
-                     new String[] {  });
-
-    // � 000D � 00AD �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u00AD",
-                     new String[] {  });
-
-    // � 000D � 0308 � 00AD �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u00AD",
-                     new String[] {  });
-
-    // � 000D � 0300 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0300",
-                     new String[] {  });
-
-    // � 000D � 0308 � 0300 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0300",
-                     new String[] {  });
-
-    // � 000D � 0061 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000D � 0308 � 0061 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000D � 0061 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0308 � 0061 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0061 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0308 � 0061 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0061 � 0027 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0061 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0308 � 0061 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000D � 0031 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0308 � 0031 � 003A �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0031 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0308 � 0031 � 0027 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0031 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0308 � 0031 � 002C �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0031 � 002E � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000D � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <CARRIAGE RETURN (CR)> (CR) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\r\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0001 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0001",
-                     new String[] {  });
-
-    // � 000A � 0308 � 0001 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0001",
-                     new String[] {  });
-
-    // � 000A � 000D �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\r",
-                     new String[] {  });
-
-    // � 000A � 0308 � 000D �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\r",
-                     new String[] {  });
-
-    // � 000A � 000A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\n",
-                     new String[] {  });
-
-    // � 000A � 0308 � 000A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\n",
-                     new String[] {  });
-
-    // � 000A � 000B �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u000B",
-                     new String[] {  });
-
-    // � 000A � 0308 � 000B �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u000B",
-                     new String[] {  });
-
-    // � 000A � 3031 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000A � 0308 � 3031 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000A � 0041 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000A � 0308 � 0041 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000A � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u003A",
-                     new String[] {  });
-
-    // � 000A � 0308 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u003A",
-                     new String[] {  });
-
-    // � 000A � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002C",
-                     new String[] {  });
-
-    // � 000A � 0308 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002C",
-                     new String[] {  });
-
-    // � 000A � 002E �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u002E",
-                     new String[] {  });
-
-    // � 000A � 0308 � 002E �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u002E",
-                     new String[] {  });
-
-    // � 000A � 0030 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000A � 0308 � 0030 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000A � 005F �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u005F",
-                     new String[] {  });
-
-    // � 000A � 0308 � 005F �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u005F",
-                     new String[] {  });
-
-    // � 000A � 1F1E6 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000A � 0308 � 1F1E6 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000A � 05D0 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000A � 0308 � 05D0 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000A � 0022 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\"",
-                     new String[] {  });
-
-    // � 000A � 0308 � 0022 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\"",
-                     new String[] {  });
-
-    // � 000A � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0027",
-                     new String[] {  });
-
-    // � 000A � 0308 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0027",
-                     new String[] {  });
-
-    // � 000A � 00AD �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u00AD",
-                     new String[] {  });
-
-    // � 000A � 0308 � 00AD �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u00AD",
-                     new String[] {  });
-
-    // � 000A � 0300 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0300",
-                     new String[] {  });
-
-    // � 000A � 0308 � 0300 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0300",
-                     new String[] {  });
-
-    // � 000A � 0061 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000A � 0308 � 0061 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000A � 0061 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0308 � 0061 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0061 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0308 � 0061 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0061 � 0027 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0061 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0308 � 0061 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000A � 0031 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0308 � 0031 � 003A �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0031 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0308 � 0031 � 0027 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0031 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0308 � 0031 � 002C �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0031 � 002E � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000A � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <LINE FEED (LF)> (LF) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\n\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0001 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0001",
-                     new String[] {  });
-
-    // � 000B � 0308 � 0001 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0001",
-                     new String[] {  });
-
-    // � 000B � 000D �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\r",
-                     new String[] {  });
-
-    // � 000B � 0308 � 000D �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\r",
-                     new String[] {  });
-
-    // � 000B � 000A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\n",
-                     new String[] {  });
-
-    // � 000B � 0308 � 000A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\n",
-                     new String[] {  });
-
-    // � 000B � 000B �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u000B",
-                     new String[] {  });
-
-    // � 000B � 0308 � 000B �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u000B",
-                     new String[] {  });
-
-    // � 000B � 3031 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000B � 0308 � 3031 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // � 000B � 0041 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000B � 0308 � 0041 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // � 000B � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u003A",
-                     new String[] {  });
-
-    // � 000B � 0308 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u003A",
-                     new String[] {  });
-
-    // � 000B � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002C",
-                     new String[] {  });
-
-    // � 000B � 0308 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002C",
-                     new String[] {  });
-
-    // � 000B � 002E �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u002E",
-                     new String[] {  });
-
-    // � 000B � 0308 � 002E �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u002E",
-                     new String[] {  });
-
-    // � 000B � 0030 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000B � 0308 � 0030 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // � 000B � 005F �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u005F",
-                     new String[] {  });
-
-    // � 000B � 0308 � 005F �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u005F",
-                     new String[] {  });
-
-    // � 000B � 1F1E6 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000B � 0308 � 1F1E6 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 000B � 05D0 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000B � 0308 � 05D0 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 000B � 0022 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\"",
-                     new String[] {  });
-
-    // � 000B � 0308 � 0022 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\"",
-                     new String[] {  });
-
-    // � 000B � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0027",
-                     new String[] {  });
-
-    // � 000B � 0308 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0027",
-                     new String[] {  });
-
-    // � 000B � 00AD �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u00AD",
-                     new String[] {  });
-
-    // � 000B � 0308 � 00AD �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u00AD",
-                     new String[] {  });
-
-    // � 000B � 0300 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0300",
-                     new String[] {  });
-
-    // � 000B � 0308 � 0300 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0300",
-                     new String[] {  });
-
-    // � 000B � 0061 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000B � 0308 � 0061 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u2060",
-                     new String[] { "\u0061\u2060" });
-
-    // � 000B � 0061 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0308 � 0061 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u003A",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0061 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0308 � 0061 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0061 � 0027 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0061 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0308 � 0061 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0061\u002C",
-                     new String[] { "\u0061" });
-
-    // � 000B � 0031 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0308 � 0031 � 003A �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u003A",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0031 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0308 � 0031 � 0027 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u0027",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0031 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0308 � 0031 � 002C �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002C",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0031 � 002E � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 000B � 0308 � 0031 � 002E � 2060 �  #  � [0.2] <LINE TABULATION> (Newline) � [3.1] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u000B\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0031" });
-
-    // � 3031 � 0001 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0001",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 0001 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0001",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 000D �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\r",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 000D �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\r",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 000A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\n",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 000A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\n",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 000B �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u000B",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 000B �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u000B",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 3031 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [13.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u3031",
-                     new String[] { "\u3031\u3031" });
-
-    // � 3031 � 0308 � 3031 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u3031",
-                     new String[] { "\u3031\u0308\u3031" });
-
-    // � 3031 � 0041 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0041",
-                     new String[] { "\u3031", "\u0041" });
-
-    // � 3031 � 0308 � 0041 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0041",
-                     new String[] { "\u3031\u0308", "\u0041" });
-
-    // � 3031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u003A",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u003A",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002C",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002C",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 002E �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u002E",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 002E �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u002E",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 0030 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0030",
-                     new String[] { "\u3031", "\u0030" });
-
-    // � 3031 � 0308 � 0030 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0030",
-                     new String[] { "\u3031\u0308", "\u0030" });
-
-    // � 3031 � 005F �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u005F",
-                     new String[] { "\u3031\u005F" });
-
-    // � 3031 � 0308 � 005F �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u005F",
-                     new String[] { "\u3031\u0308\u005F" });
-
-    // � 3031 � 1F1E6 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\uD83C\uDDE6",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 1F1E6 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\uD83C\uDDE6",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 05D0 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u05D0",
-                     new String[] { "\u3031", "\u05D0" });
-
-    // � 3031 � 0308 � 05D0 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u05D0",
-                     new String[] { "\u3031\u0308", "\u05D0" });
-
-    // � 3031 � 0022 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\"",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 0022 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\"",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0027",
-                     new String[] { "\u3031" });
-
-    // � 3031 � 0308 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0027",
-                     new String[] { "\u3031\u0308" });
-
-    // � 3031 � 00AD �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u00AD",
-                     new String[] { "\u3031\u00AD" });
-
-    // � 3031 � 0308 � 00AD �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u00AD",
-                     new String[] { "\u3031\u0308\u00AD" });
-
-    // � 3031 � 0300 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0300",
-                     new String[] { "\u3031\u0300" });
-
-    // � 3031 � 0308 � 0300 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0300",
-                     new String[] { "\u3031\u0308\u0300" });
-
-    // � 3031 � 0061 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u2060",
-                     new String[] { "\u3031", "\u0061\u2060" });
-
-    // � 3031 � 0308 � 0061 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u2060",
-                     new String[] { "\u3031\u0308", "\u0061\u2060" });
-
-    // � 3031 � 0061 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u003A",
-                     new String[] { "\u3031", "\u0061" });
-
-    // � 3031 � 0308 � 0061 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u003A",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // � 3031 � 0061 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027",
-                     new String[] { "\u3031", "\u0061" });
-
-    // � 3031 � 0308 � 0061 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // � 3031 � 0061 � 0027 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u0027\u2060",
-                     new String[] { "\u3031", "\u0061" });
-
-    // � 3031 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u0027\u2060",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // � 3031 � 0061 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0061\u002C",
-                     new String[] { "\u3031", "\u0061" });
-
-    // � 3031 � 0308 � 0061 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0061\u002C",
-                     new String[] { "\u3031\u0308", "\u0061" });
-
-    // � 3031 � 0031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u003A",
-                     new String[] { "\u3031", "\u0031" });
-
-    // � 3031 � 0308 � 0031 � 003A �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u003A",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // � 3031 � 0031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u0027",
-                     new String[] { "\u3031", "\u0031" });
-
-    // � 3031 � 0308 � 0031 � 0027 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u0027",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // � 3031 � 0031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002C",
-                     new String[] { "\u3031", "\u0031" });
-
-    // � 3031 � 0308 � 0031 � 002C �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002C",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // � 3031 � 0031 � 002E � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0031\u002E\u2060",
-                     new String[] { "\u3031", "\u0031" });
-
-    // � 3031 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] VERTICAL KANA REPEAT MARK (Katakana) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u3031\u0308\u0031\u002E\u2060",
-                     new String[] { "\u3031\u0308", "\u0031" });
-
-    // � 0041 � 0001 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0001",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 0001 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0001",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 000D �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\r",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 000D �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\r",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 000A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\n",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 000A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\n",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 000B �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u000B",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 000B �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u000B",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 3031 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u3031",
-                     new String[] { "\u0041", "\u3031" });
-
-    // � 0041 � 0308 � 3031 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u3031",
-                     new String[] { "\u0041\u0308", "\u3031" });
-
-    // � 0041 � 0041 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0041",
-                     new String[] { "\u0041\u0041" });
-
-    // � 0041 � 0308 � 0041 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0041",
-                     new String[] { "\u0041\u0308\u0041" });
-
-    // � 0041 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u003A",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u003A",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002C",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002C",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 002E �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u002E",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 002E �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u002E",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 0030 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0030",
-                     new String[] { "\u0041\u0030" });
-
-    // � 0041 � 0308 � 0030 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0030",
-                     new String[] { "\u0041\u0308\u0030" });
-
-    // � 0041 � 005F �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u005F",
-                     new String[] { "\u0041\u005F" });
-
-    // � 0041 � 0308 � 005F �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [13.1] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u005F",
-                     new String[] { "\u0041\u0308\u005F" });
-
-    // � 0041 � 1F1E6 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\uD83C\uDDE6",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 1F1E6 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\uD83C\uDDE6",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 05D0 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u05D0",
-                     new String[] { "\u0041\u05D0" });
-
-    // � 0041 � 0308 � 05D0 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u05D0",
-                     new String[] { "\u0041\u0308\u05D0" });
-
-    // � 0041 � 0022 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\"",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 0022 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\"",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0027",
-                     new String[] { "\u0041" });
-
-    // � 0041 � 0308 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0027",
-                     new String[] { "\u0041\u0308" });
-
-    // � 0041 � 00AD �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u00AD",
-                     new String[] { "\u0041\u00AD" });
-
-    // � 0041 � 0308 � 00AD �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u00AD",
-                     new String[] { "\u0041\u0308\u00AD" });
-
-    // � 0041 � 0300 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0300",
-                     new String[] { "\u0041\u0300" });
-
-    // � 0041 � 0308 � 0300 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0300",
-                     new String[] { "\u0041\u0308\u0300" });
-
-    // � 0041 � 0061 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u2060",
-                     new String[] { "\u0041\u0061\u2060" });
-
-    // � 0041 � 0308 � 0061 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u2060",
-                     new String[] { "\u0041\u0308\u0061\u2060" });
-
-    // � 0041 � 0061 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u003A",
-                     new String[] { "\u0041\u0061" });
-
-    // � 0041 � 0308 � 0061 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u003A",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // � 0041 � 0061 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027",
-                     new String[] { "\u0041\u0061" });
-
-    // � 0041 � 0308 � 0061 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // � 0041 � 0061 � 0027 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0061" });
-
-    // � 0041 � 0308 � 0061 � 0027 � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] APOSTROPHE (Single_Quote) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u0027\u2060",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // � 0041 � 0061 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0061\u002C",
-                     new String[] { "\u0041\u0061" });
-
-    // � 0041 � 0308 � 0061 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [5.0] LATIN SMALL LETTER A (ALetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0061\u002C",
-                     new String[] { "\u0041\u0308\u0061" });
-
-    // � 0041 � 0031 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u003A",
-                     new String[] { "\u0041\u0031" });
-
-    // � 0041 � 0308 � 0031 � 003A �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u003A",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // � 0041 � 0031 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u0027",
-                     new String[] { "\u0041\u0031" });
-
-    // � 0041 � 0308 � 0031 � 0027 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u0027",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // � 0041 � 0031 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002C",
-                     new String[] { "\u0041\u0031" });
-
-    // � 0041 � 0308 � 0031 � 002C �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002C",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // � 0041 � 0031 � 002E � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [9.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0031" });
-
-    // � 0041 � 0308 � 0031 � 002E � 2060 �  #  � [0.2] LATIN CAPITAL LETTER A (ALetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [9.0] DIGIT ONE (Numeric) � [999.0] FULL STOP (MidNumLet) � [4.0] WORD JOINER (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u0041\u0308\u0031\u002E\u2060",
-                     new String[] { "\u0041\u0308\u0031" });
-
-    // � 003A � 0001 �  #  � [0.2] COLON (MidLetter) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0001",
-                     new String[] {  });
-
-    // � 003A � 0308 � 0001 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] <START OF HEADING> (Other) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0001",
-                     new String[] {  });
-
-    // � 003A � 000D �  #  � [0.2] COLON (MidLetter) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\r",
-                     new String[] {  });
-
-    // � 003A � 0308 � 000D �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <CARRIAGE RETURN (CR)> (CR) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\r",
-                     new String[] {  });
-
-    // � 003A � 000A �  #  � [0.2] COLON (MidLetter) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\n",
-                     new String[] {  });
-
-    // � 003A � 0308 � 000A �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE FEED (LF)> (LF) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\n",
-                     new String[] {  });
-
-    // � 003A � 000B �  #  � [0.2] COLON (MidLetter) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u000B",
-                     new String[] {  });
-
-    // � 003A � 0308 � 000B �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [3.2] <LINE TABULATION> (Newline) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u000B",
-                     new String[] {  });
-
-    // � 003A � 3031 �  #  � [0.2] COLON (MidLetter) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u3031",
-                     new String[] { "\u3031" });
-
-    // � 003A � 0308 � 3031 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] VERTICAL KANA REPEAT MARK (Katakana) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u3031",
-                     new String[] { "\u3031" });
-
-    // � 003A � 0041 �  #  � [0.2] COLON (MidLetter) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0041",
-                     new String[] { "\u0041" });
-
-    // � 003A � 0308 � 0041 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LATIN CAPITAL LETTER A (ALetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0041",
-                     new String[] { "\u0041" });
-
-    // � 003A � 003A �  #  � [0.2] COLON (MidLetter) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u003A",
-                     new String[] {  });
-
-    // � 003A � 0308 � 003A �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COLON (MidLetter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u003A",
-                     new String[] {  });
-
-    // � 003A � 002C �  #  � [0.2] COLON (MidLetter) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002C",
-                     new String[] {  });
-
-    // � 003A � 0308 � 002C �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] COMMA (MidNum) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002C",
-                     new String[] {  });
-
-    // � 003A � 002E �  #  � [0.2] COLON (MidLetter) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u002E",
-                     new String[] {  });
-
-    // � 003A � 0308 � 002E �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] FULL STOP (MidNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u002E",
-                     new String[] {  });
-
-    // � 003A � 0030 �  #  � [0.2] COLON (MidLetter) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0030",
-                     new String[] { "\u0030" });
-
-    // � 003A � 0308 � 0030 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] DIGIT ZERO (Numeric) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0030",
-                     new String[] { "\u0030" });
-
-    // � 003A � 005F �  #  � [0.2] COLON (MidLetter) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u005F",
-                     new String[] {  });
-
-    // � 003A � 0308 � 005F �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] LOW LINE (ExtendNumLet) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u005F",
-                     new String[] {  });
-
-    // � 003A � 1F1E6 �  #  � [0.2] COLON (MidLetter) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 003A � 0308 � 1F1E6 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] REGIONAL INDICATOR SYMBOL LETTER A (Regional_Indicator) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\uD83C\uDDE6",
-                     new String[] {  });
-
-    // � 003A � 05D0 �  #  � [0.2] COLON (MidLetter) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 003A � 0308 � 05D0 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] HEBREW LETTER ALEF (Hebrew_Letter) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u05D0",
-                     new String[] { "\u05D0" });
-
-    // � 003A � 0022 �  #  � [0.2] COLON (MidLetter) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\"",
-                     new String[] {  });
-
-    // � 003A � 0308 � 0022 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] QUOTATION MARK (Double_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\"",
-                     new String[] {  });
-
-    // � 003A � 0027 �  #  � [0.2] COLON (MidLetter) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0027",
-                     new String[] {  });
-
-    // � 003A � 0308 � 0027 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [999.0] APOSTROPHE (Single_Quote) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0027",
-                     new String[] {  });
-
-    // � 003A � 00AD �  #  � [0.2] COLON (MidLetter) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u00AD",
-                     new String[] {  });
-
-    // � 003A � 0308 � 00AD �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] SOFT HYPHEN (Format_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u00AD",
-                     new String[] {  });
-
-    // � 003A � 0300 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0300",
-                     new String[] {  });
-
-    // � 003A � 0308 � 0300 �  #  � [0.2] COLON (MidLetter) � [4.0] COMBINING DIAERESIS (Extend_FE) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-    assertAnalyzesTo(analyzer, "\u003A\u0308\u0300",
-                     new

<TRUNCATED>

[12/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

Posted by mi...@apache.org.

LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/ba922148
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/ba922148
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/ba922148

Branch: refs/heads/branch_6x
Commit: ba922148307248893bf70d02b28efdec9882f348
Parents: 45d2d2e
Author: Mike McCandless <mi...@apache.org>
Authored: Tue Jun 14 16:38:04 2016 -0400
Committer: Mike McCandless <mi...@apache.org>
Committed: Tue Jun 14 18:41:37 2016 -0400

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |    4 +
 lucene/analysis/common/build.xml                |   39 +-
 .../lucene/analysis/ar/ArabicAnalyzer.java      |   12 +-
 .../lucene/analysis/bg/BulgarianAnalyzer.java   |   10 +-
 .../lucene/analysis/br/BrazilianAnalyzer.java   |   10 +-
 .../lucene/analysis/ca/CatalanAnalyzer.java     |   10 +-
 .../charfilter/HTMLStripCharFilter.java         |    4 +-
 .../apache/lucene/analysis/cjk/CJKAnalyzer.java |    8 +-
 .../lucene/analysis/ckb/SoraniAnalyzer.java     |   14 +-
 .../analysis/commongrams/CommonGramsFilter.java |    4 +-
 .../commongrams/CommonGramsFilterFactory.java   |    3 +-
 .../compound/CompoundWordTokenFilterBase.java   |    8 +-
 .../DictionaryCompoundWordTokenFilter.java      |    2 +-
 ...ictionaryCompoundWordTokenFilterFactory.java |    8 +-
 .../HyphenationCompoundWordTokenFilter.java     |    7 +-
 ...phenationCompoundWordTokenFilterFactory.java |   11 +-
 .../lucene/analysis/core/LowerCaseFilter.java   |   50 -
 .../analysis/core/LowerCaseFilterFactory.java   |    2 +-
 .../lucene/analysis/core/SimpleAnalyzer.java    |    1 +
 .../lucene/analysis/core/StopAnalyzer.java      |   25 +-
 .../apache/lucene/analysis/core/StopFilter.java |  111 -
 .../lucene/analysis/core/StopFilterFactory.java |   11 +-
 .../lucene/analysis/core/TypeTokenFilter.java   |    2 +-
 .../lucene/analysis/core/UpperCaseFilter.java   |    3 +-
 .../lucene/analysis/cz/CzechAnalyzer.java       |   18 +-
 .../lucene/analysis/da/DanishAnalyzer.java      |   12 +-
 .../lucene/analysis/de/GermanAnalyzer.java      |   12 +-
 .../lucene/analysis/el/GreekAnalyzer.java       |    6 +-
 .../apache/lucene/analysis/el/GreekStemmer.java |    2 +-
 .../lucene/analysis/en/EnglishAnalyzer.java     |    8 +-
 .../org/apache/lucene/analysis/en/KStemmer.java |    2 +-
 .../lucene/analysis/es/SpanishAnalyzer.java     |   12 +-
 .../lucene/analysis/eu/BasqueAnalyzer.java      |   10 +-
 .../lucene/analysis/fa/PersianAnalyzer.java     |    8 +-
 .../lucene/analysis/fi/FinnishAnalyzer.java     |   12 +-
 .../lucene/analysis/fr/FrenchAnalyzer.java      |   24 +-
 .../lucene/analysis/ga/IrishAnalyzer.java       |    8 +-
 .../lucene/analysis/gl/GalicianAnalyzer.java    |   12 +-
 .../lucene/analysis/hi/HindiAnalyzer.java       |   12 +-
 .../lucene/analysis/hu/HungarianAnalyzer.java   |   12 +-
 .../lucene/analysis/hunspell/Stemmer.java       |    2 +-
 .../lucene/analysis/hy/ArmenianAnalyzer.java    |   10 +-
 .../lucene/analysis/id/IndonesianAnalyzer.java  |   10 +-
 .../lucene/analysis/it/ItalianAnalyzer.java     |   12 +-
 .../lucene/analysis/lt/LithuanianAnalyzer.java  |   10 +-
 .../lucene/analysis/lv/LatvianAnalyzer.java     |   12 +-
 .../miscellaneous/CapitalizationFilter.java     |    2 +-
 .../CapitalizationFilterFactory.java            |    8 +-
 .../miscellaneous/CodepointCountFilter.java     |    2 +-
 .../miscellaneous/DateRecognizerFilter.java     |    2 +-
 .../miscellaneous/FingerprintFilter.java        |    2 +-
 .../analysis/miscellaneous/KeepWordFilter.java  |    4 +-
 .../miscellaneous/KeepWordFilterFactory.java    |    8 +-
 .../KeywordMarkerFilterFactory.java             |    2 +-
 .../analysis/miscellaneous/LengthFilter.java    |    2 +-
 .../RemoveDuplicatesTokenFilter.java            |    2 +-
 .../miscellaneous/SetKeywordMarkerFilter.java   |    2 +-
 .../miscellaneous/WordDelimiterFilter.java      |   10 +-
 .../WordDelimiterFilterFactory.java             |   16 +-
 .../lucene/analysis/ngram/NGramTokenizer.java   |    2 +-
 .../lucene/analysis/nl/DutchAnalyzer.java       |   22 +-
 .../lucene/analysis/no/NorwegianAnalyzer.java   |   12 +-
 .../lucene/analysis/pt/PortugueseAnalyzer.java  |   12 +-
 .../lucene/analysis/pt/RSLPStemmerBase.java     |    2 +-
 .../query/QueryAutoStopWordAnalyzer.java        |    4 +-
 .../lucene/analysis/ro/RomanianAnalyzer.java    |   10 +-
 .../lucene/analysis/ru/RussianAnalyzer.java     |   16 +-
 .../analysis/snowball/SnowballFilter.java       |    4 +-
 .../snowball/SnowballPorterFilterFactory.java   |    6 +-
 .../analysis/standard/ClassicAnalyzer.java      |   10 +-
 .../analysis/standard/StandardAnalyzer.java     |   98 -
 .../analysis/standard/StandardFilter.java       |   38 -
 .../analysis/standard/StandardTokenizer.java    |  201 -
 .../standard/StandardTokenizerImpl.java         |  818 ---
 .../standard/StandardTokenizerImpl.jflex        |  201 -
 .../standard/UAX29URLEmailAnalyzer.java         |   14 +-
 .../lucene/analysis/standard/package-info.java  |   63 -
 .../lucene/analysis/standard/package.html       |   50 +
 .../lucene/analysis/sv/SwedishAnalyzer.java     |   12 +-
 .../analysis/synonym/SynonymFilterFactory.java  |    2 +-
 .../apache/lucene/analysis/th/ThaiAnalyzer.java |    8 +-
 .../lucene/analysis/tr/TurkishAnalyzer.java     |    8 +-
 .../analysis/util/AbstractAnalysisFactory.java  |    4 +-
 .../lucene/analysis/util/CharArrayMap.java      |  669 ---
 .../lucene/analysis/util/CharArraySet.java      |  193 -
 .../lucene/analysis/util/CharTokenizer.java     |   10 +-
 .../lucene/analysis/util/CharacterUtils.java    |  251 -
 .../lucene/analysis/util/ElisionFilter.java     |    2 +-
 .../analysis/util/ElisionFilterFactory.java     |    1 +
 .../analysis/util/FilteringTokenFilter.java     |   76 -
 .../analysis/util/StopwordAnalyzerBase.java     |  138 -
 .../lucene/analysis/util/WordlistLoader.java    |  244 -
 .../apache/lucene/collation/package-info.java   |    2 +-
 .../lucene/analysis/ar/TestArabicAnalyzer.java  |    2 +-
 .../analysis/ar/TestArabicStemFilter.java       |    2 +-
 .../analysis/bg/TestBulgarianAnalyzer.java      |    2 +-
 .../analysis/bg/TestBulgarianStemmer.java       |    2 +-
 .../analysis/br/TestBrazilianAnalyzer.java      |    2 +-
 .../lucene/analysis/ca/TestCatalanAnalyzer.java |    2 +-
 .../lucene/analysis/cjk/TestCJKAnalyzer.java    |    4 +-
 .../lucene/analysis/ckb/TestSoraniAnalyzer.java |    2 +-
 .../commongrams/CommonGramsFilterTest.java      |    2 +-
 .../TestCommonGramsFilterFactory.java           |   10 +-
 .../TestCommonGramsQueryFilterFactory.java      |    7 +-
 .../compound/TestCompoundWordTokenFilter.java   |    2 +-
 .../lucene/analysis/core/TestAnalyzers.java     |    1 +
 .../analysis/core/TestBugInSomething.java       |    2 +-
 .../lucene/analysis/core/TestRandomChains.java  |    6 +-
 .../lucene/analysis/core/TestStopAnalyzer.java  |   14 +-
 .../lucene/analysis/core/TestStopFilter.java    |  176 -
 .../analysis/core/TestStopFilterFactory.java    |    2 +-
 .../lucene/analysis/cz/TestCzechAnalyzer.java   |    4 +-
 .../lucene/analysis/cz/TestCzechStemmer.java    |    2 +-
 .../lucene/analysis/da/TestDanishAnalyzer.java  |    2 +-
 .../lucene/analysis/de/TestGermanAnalyzer.java  |    2 +-
 .../analysis/de/TestGermanLightStemFilter.java  |    2 +-
 .../de/TestGermanMinimalStemFilter.java         |    2 +-
 .../analysis/de/TestGermanStemFilter.java       |    4 +-
 .../lucene/analysis/en/TestEnglishAnalyzer.java |    2 +-
 .../analysis/en/TestPorterStemFilter.java       |    8 +-
 .../lucene/analysis/es/TestSpanishAnalyzer.java |    2 +-
 .../lucene/analysis/eu/TestBasqueAnalyzer.java  |    2 +-
 .../lucene/analysis/fa/TestPersianAnalyzer.java |    2 +-
 .../lucene/analysis/fi/TestFinnishAnalyzer.java |    2 +-
 .../analysis/fi/TestFinnishLightStemFilter.java |    2 +-
 .../lucene/analysis/fr/TestFrenchAnalyzer.java  |    2 +-
 .../analysis/fr/TestFrenchLightStemFilter.java  |    2 +-
 .../fr/TestFrenchMinimalStemFilter.java         |    2 +-
 .../lucene/analysis/ga/TestIrishAnalyzer.java   |    2 +-
 .../analysis/gl/TestGalicianAnalyzer.java       |    2 +-
 .../gl/TestGalicianMinimalStemFilter.java       |    2 +-
 .../lucene/analysis/hi/TestHindiAnalyzer.java   |    2 +-
 .../analysis/hu/TestHungarianAnalyzer.java      |    2 +-
 .../hu/TestHungarianLightStemFilter.java        |    2 +-
 .../hunspell/TestHunspellStemFilter.java        |    2 +-
 .../analysis/hy/TestArmenianAnalyzer.java       |    2 +-
 .../analysis/id/TestIndonesianAnalyzer.java     |    2 +-
 .../lucene/analysis/it/TestItalianAnalyzer.java |    2 +-
 .../analysis/lt/TestLithuanianAnalyzer.java     |    2 +-
 .../lucene/analysis/lv/TestLatvianAnalyzer.java |    2 +-
 .../miscellaneous/TestCapitalizationFilter.java |    2 +-
 .../miscellaneous/TestKeepFilterFactory.java    |    4 +-
 .../miscellaneous/TestKeepWordFilter.java       |    2 +-
 .../miscellaneous/TestKeywordMarkerFilter.java  |    4 +-
 .../TestStemmerOverrideFilter.java              |    2 +-
 .../miscellaneous/TestWordDelimiterFilter.java  |   12 +-
 .../lucene/analysis/nl/TestDutchAnalyzer.java   |    6 +-
 .../analysis/no/TestNorwegianAnalyzer.java      |    2 +-
 .../no/TestNorwegianLightStemFilter.java        |    2 +-
 .../no/TestNorwegianMinimalStemFilter.java      |    2 +-
 .../analysis/pt/TestPortugueseAnalyzer.java     |    2 +-
 .../pt/TestPortugueseLightStemFilter.java       |    2 +-
 .../pt/TestPortugueseMinimalStemFilter.java     |    2 +-
 .../analysis/pt/TestPortugueseStemFilter.java   |    6 +-
 .../analysis/ro/TestRomanianAnalyzer.java       |    2 +-
 .../lucene/analysis/ru/TestRussianAnalyzer.java |    4 +-
 .../analysis/ru/TestRussianLightStemFilter.java |    2 +-
 .../shingle/ShingleAnalyzerWrapperTest.java     |    4 +-
 .../analysis/sinks/TestTeeSinkTokenFilter.java  |    4 +-
 .../analysis/standard/TestStandardAnalyzer.java |  390 --
 .../standard/WordBreakTestUnicode_6_3_0.java    | 5537 ------------------
 .../generateJavaUnicodeWordBreakTest.pl         |  232 -
 .../lucene/analysis/sv/TestSwedishAnalyzer.java |    2 +-
 .../analysis/sv/TestSwedishLightStemFilter.java |    2 +-
 .../lucene/analysis/th/TestThaiAnalyzer.java    |    2 +-
 .../lucene/analysis/tr/TestTurkishAnalyzer.java |    2 +-
 .../lucene/analysis/util/TestCharArrayMap.java  |  244 -
 .../lucene/analysis/util/TestCharArraySet.java  |  429 --
 .../analysis/util/TestCharacterUtils.java       |  107 -
 .../lucene/analysis/util/TestElision.java       |    2 +-
 .../util/TestFilesystemResourceLoader.java      |    2 +
 .../analysis/util/TestWordlistLoader.java       |   79 -
 lucene/analysis/icu/src/java/overview.html      |    2 +-
 .../segmentation/TestWithCJKBigramFilter.java   |    4 +-
 .../lucene/analysis/ja/JapaneseAnalyzer.java    |    8 +-
 .../analysis/ja/JapaneseNumberFilter.java       |    4 +-
 .../ja/JapanesePartOfSpeechStopFilter.java      |    4 +-
 .../JapanesePartOfSpeechStopFilterFactory.java  |    2 +-
 .../analysis/ja/TestJapaneseBaseFormFilter.java |    2 +-
 .../ja/TestJapaneseKatakanaStemFilter.java      |    6 +-
 .../analysis/ja/TestJapaneseNumberFilter.java   |    2 +-
 .../analysis/morfologik/MorfologikFilter.java   |   12 +-
 .../morfologik/TestMorfologikAnalyzer.java      |    2 +-
 .../analysis/cn/smart/SmartChineseAnalyzer.java |    6 +-
 .../lucene/analysis/pl/PolishAnalyzer.java      |   14 +-
 .../lucene/analysis/pl/TestPolishAnalyzer.java  |    2 +-
 lucene/common-build.xml                         |   30 +
 lucene/core/build.xml                           |   18 +-
 .../apache/lucene/analysis/CharArrayMap.java    |  669 +++
 .../apache/lucene/analysis/CharArraySet.java    |  196 +
 .../apache/lucene/analysis/CharacterUtils.java  |  251 +
 .../lucene/analysis/FilteringTokenFilter.java   |   76 +
 .../apache/lucene/analysis/LowerCaseFilter.java |   50 +
 .../org/apache/lucene/analysis/StopFilter.java  |  111 +
 .../lucene/analysis/StopwordAnalyzerBase.java   |  138 +
 .../apache/lucene/analysis/WordlistLoader.java  |  244 +
 .../apache/lucene/analysis/package-info.java    |    2 +-
 .../analysis/standard/StandardAnalyzer.java     |  115 +
 .../analysis/standard/StandardFilter.java       |   39 +
 .../analysis/standard/StandardTokenizer.java    |  213 +
 .../standard/StandardTokenizerImpl.java         |  823 +++
 .../standard/StandardTokenizerImpl.jflex        |  206 +
 .../lucene/analysis/standard/package-info.java  |   33 +
 .../apache/lucene/index/IndexWriterConfig.java  |   17 +-
 .../org/apache/lucene/util/packed/Direct16.java |    2 +-
 .../org/apache/lucene/util/packed/Direct32.java |    2 +-
 .../org/apache/lucene/util/packed/Direct64.java |    2 +-
 .../org/apache/lucene/util/packed/Direct8.java  |    2 +-
 .../lucene/util/packed/Packed16ThreeBlocks.java |    2 +-
 .../lucene/util/packed/Packed64SingleBlock.java |    2 +-
 .../lucene/util/packed/Packed8ThreeBlocks.java  |    2 +-
 lucene/core/src/java/overview.html              |    2 +-
 .../lucene/analysis/TestCharArrayMap.java       |  244 +
 .../lucene/analysis/TestCharArraySet.java       |  430 ++
 .../lucene/analysis/TestCharacterUtils.java     |  107 +
 .../apache/lucene/analysis/TestStopFilter.java  |  176 +
 .../lucene/analysis/TestWordlistLoader.java     |   79 +
 .../analysis/standard/TestStandardAnalyzer.java |  390 ++
 .../suggest/analyzing/SuggestStopFilter.java    |    4 +-
 .../analyzing/SuggestStopFilterFactory.java     |   10 +-
 .../analyzing/AnalyzingInfixSuggesterTest.java  |    4 +-
 .../analyzing/BlendedInfixSuggesterTest.java    |    2 +-
 .../analyzing/TestFreeTextSuggester.java        |    6 +-
 .../analyzing/TestSuggestStopFilter.java        |    4 +-
 .../analyzing/TestSuggestStopFilterFactory.java |    2 +-
 .../standard/WordBreakTestUnicode_6_3_0.java    | 5537 ++++++++++++++++++
 .../generateJavaUnicodeWordBreakTest.pl         |  232 +
 .../lucene/analysis/standard/package.html       |   26 +
 .../SolrStopwordsCarrot2LexicalDataFactory.java |    4 +-
 .../apache/solr/core/SolrResourceLoader.java    |   10 +-
 .../analysis/ManagedStopFilterFactory.java      |    4 +-
 .../DocumentAnalysisRequestHandlerTest.java     |    8 +-
 .../FieldAnalysisRequestHandlerTest.java        |   10 +-
 .../spelling/TestSuggestSpellingConverter.java  |    2 +-
 234 files changed, 10991 insertions(+), 10871 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 68716c6..c5a85d0 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -35,6 +35,10 @@ Improvements
   write-once architecture, possibly catching externally caused
   issues sooner (Robert Muir, Mike McCandless)
 
+* LUCENE-7318: StandardAnalyzer has been moved from the analysis
+  module into core and is now the default analyzer in
+  IndexWriterConfig (Robert Muir, Mike McCandless)
+
 Optimizations
 
 * LUCENE-7330: Speed up conjunction queries. (Adrien Grand)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/build.xml b/lucene/analysis/common/build.xml
index 670e6ab..56ca4d0 100644
--- a/lucene/analysis/common/build.xml
+++ b/lucene/analysis/common/build.xml
@@ -33,7 +33,7 @@
   
   <property name="unicode-props-file" location="src/java/org/apache/lucene/analysis/util/UnicodeProps.java"/>
 
-  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-StandardAnalyzer,-jflex-UAX29URLEmailTokenizer,
+  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-ClassicAnalyzer,-jflex-UAX29URLEmailTokenizer,
                                 -jflex-wiki-tokenizer,-jflex-HTMLStripCharFilter"/>
 
   <target name="-jflex-HTMLStripCharFilter"
@@ -62,45 +62,14 @@
     <run-jflex dir="src/java/org/apache/lucene/analysis/wikipedia" name="WikipediaTokenizerImpl"/>
   </target>
 
-  <target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
-    <run-jflex-and-disable-buffer-expansion 
-        dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
-    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
-  </target>
-
   <target name="-jflex-UAX29URLEmailTokenizer" depends="init,-install-jflex">
     <run-jflex-and-disable-buffer-expansion
         dir="src/java/org/apache/lucene/analysis/standard" name="UAX29URLEmailTokenizerImpl"/>
   </target>
   
-  <macrodef name="run-jflex">
-    <attribute name="dir"/>
-    <attribute name="name"/>
-    <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
-    </sequential>
-  </macrodef>
-
-  <macrodef name="run-jflex-and-disable-buffer-expansion">
-    <attribute name="dir"/>
-    <attribute name="name"/>
-    <sequential>
-      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
-      <!-- LUCENE-5897: Disallow scanner buffer expansion -->
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
-                     replace="" flags="s" />
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="private static final int ZZ_BUFFERSIZE ="
-                     replace="private int ZZ_BUFFERSIZE ="/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="int requested = zzBuffer.length - zzEndRead;"
-                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
-      <replaceregexp file="@{dir}/@{name}.java"
-                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
-                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
-    </sequential>
-  </macrodef>
+  <target name="-jflex-ClassicAnalyzer" depends="init,-install-jflex">
+    <run-jflex dir="src/java/org/apache/lucene/analysis/standard" name="ClassicTokenizerImpl"/>
+  </target>
 
   <target name="clean-jflex">
     <delete>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
index 3d36c86..71da32d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
index 24746e4..9cb0657 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * {@link Analyzer} for Bulgarian.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
index 3b02567..5dd0cbc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
index cb674de..739b61a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ca/CatalanAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.CatalanStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
index fea84d8..68a939b 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@@ -24,8 +24,8 @@ import java.util.HashMap;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArrayMap;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.util.OpenStringBuilder;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
index ed8eee6..d500ff9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKAnalyzer.java
@@ -20,13 +20,13 @@ package org.apache.lucene.analysis.cjk;
 import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * An {@link Analyzer} that tokenizes text with {@link StandardTokenizer},

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
index 78304c7..5fd1bec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ckb/SoraniAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.core.DecimalDigitFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
index 35dedde..75e991f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilter.java
@@ -18,14 +18,14 @@ package org.apache.lucene.analysis.commongrams;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /*
  * TODO: Consider implementing https://issues.apache.org/jira/browse/LUCENE-1688 changes to stop list and associated constructors 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
index ebd5ec3..946003f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/commongrams/CommonGramsFilterFactory.java
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.commongrams;
 import java.io.IOException;
 import java.util.Map;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.StopAnalyzer;
@@ -82,4 +83,4 @@ public class CommonGramsFilterFactory extends TokenFilterFactory implements Reso
 }
  
   
-  
\ No newline at end of file
+  

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
index 1920401..680e67a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
@@ -17,15 +17,15 @@
 package org.apache.lucene.analysis.compound;
 
 
+import java.io.IOException;
+import java.util.LinkedList;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-
-import java.io.IOException;
-import java.util.LinkedList;
 
 /**
  * Base class for decomposition token filters.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index b81a96c..2e4b837 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.compound;
 
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
index 440ab5e..d31cdf8 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilterFactory.java
@@ -17,15 +17,15 @@
 package org.apache.lucene.analysis.compound;
 
 
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 
-import java.util.Map;
-import java.io.IOException;
-
 /** 
  * Factory for {@link DictionaryCompoundWordTokenFilter}.
  * <pre class="prettyprint">

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
index bef438c..41f92c9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilter.java
@@ -16,15 +16,14 @@
  */
 package org.apache.lucene.analysis.compound;
 
+import java.io.IOException;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.compound.hyphenation.Hyphenation;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.xml.sax.InputSource;
 
-import java.io.IOException;
-
 /**
  * A {@link org.apache.lucene.analysis.TokenFilter} that decomposes compound words found in many Germanic languages.
  *
@@ -82,7 +81,7 @@ public class HyphenationCompoundWordTokenFilter extends
   /**
    * Create a HyphenationCompoundWordTokenFilter with no dictionary.
    * <p>
-   * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.util.CharArraySet, int, int, int, boolean)
+   * Calls {@link #HyphenationCompoundWordTokenFilter(org.apache.lucene.analysis.TokenStream, org.apache.lucene.analysis.compound.hyphenation.HyphenationTree, org.apache.lucene.analysis.CharArraySet, int, int, int, boolean)
    * HyphenationCompoundWordTokenFilter(matchVersion, input, hyphenator,
    * null, minWordSize, minSubwordSize, maxSubwordSize }
    */

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
index 9ffe405..37421bb 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/compound/HyphenationCompoundWordTokenFilterFactory.java
@@ -17,19 +17,18 @@
 package org.apache.lucene.analysis.compound;
 
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
 import org.apache.lucene.util.IOUtils;
-
-import java.util.Map;
-import java.io.IOException;
-import java.io.InputStream;
-
 import org.xml.sax.InputSource;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
deleted file mode 100644
index ade6a58..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilter.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
-
-/**
- * Normalizes token text to lower case.
- */
-public final class LowerCaseFilter extends TokenFilter {
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  
-  /**
-   * Create a new LowerCaseFilter, that normalizes token text to lower case.
-   * 
-   * @param in TokenStream to filter
-   */
-  public LowerCaseFilter(TokenStream in) {
-    super(in);
-  }
-  
-  @Override
-  public final boolean incrementToken() throws IOException {
-    if (input.incrementToken()) {
-      CharacterUtils.toLowerCase(termAtt.buffer(), 0, termAtt.length());
-      return true;
-    } else
-      return false;
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
index 785daa5..0bd9795 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/LowerCaseFilterFactory.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.core;
 import java.util.Map;
 
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
 import org.apache.lucene.analysis.util.MultiTermAwareComponent;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
index 45c8d23..d0fdcf6 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/SimpleAnalyzer.java
@@ -18,6 +18,7 @@ package org.apache.lucene.analysis.core;
 
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
 
 /** An {@link Analyzer} that filters {@link LetterTokenizer} 
  *  with {@link LowerCaseFilter} 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
index 0c8fdc8..3fa4982 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopAnalyzer.java
@@ -20,13 +20,14 @@ package org.apache.lucene.analysis.core;
 import java.io.IOException;
 import java.io.Reader;
 import java.nio.file.Path;
-import java.util.Arrays;
-import java.util.List;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 
 /** 
  * Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
@@ -35,19 +36,7 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
   
   /** An unmodifiable set containing some common English words that are not usually useful
   for searching.*/
-  public static final CharArraySet ENGLISH_STOP_WORDS_SET;
-  
-  static {
-    final List<String> stopWords = Arrays.asList(
-      "a", "an", "and", "are", "as", "at", "be", "but", "by",
-      "for", "if", "in", "into", "is", "it",
-      "no", "not", "of", "on", "or", "such",
-      "that", "the", "their", "then", "there", "these",
-      "they", "this", "to", "was", "will", "with"
-    );
-    final CharArraySet stopSet = new CharArraySet(stopWords, false);
-    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
-  }
+  public static final CharArraySet ENGLISH_STOP_WORDS_SET = StandardAnalyzer.ENGLISH_STOP_WORDS_SET;
   
   /** Builds an analyzer which removes words in
    *  {@link #ENGLISH_STOP_WORDS_SET}.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
deleted file mode 100644
index fc33a1c..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilter.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.core;
-
-
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
-
-/**
- * Removes stop words from a token stream.
- */
-public final class StopFilter extends FilteringTokenFilter {
-
-  private final CharArraySet stopWords;
-  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-  
-  /**
-   * Constructs a filter which removes words from the input TokenStream that are
-   * named in the Set.
-   * 
-   * @param in
-   *          Input stream
-   * @param stopWords
-   *          A {@link CharArraySet} representing the stopwords.
-   * @see #makeStopSet(java.lang.String...)
-   */
-  public StopFilter(TokenStream in, CharArraySet stopWords) {
-    super(in);
-    this.stopWords = stopWords;
-  }
-
-  /**
-   * Builds a Set from an array of stop words,
-   * appropriate for passing into the StopFilter constructor.
-   * This permits this stopWords construction to be cached once when
-   * an Analyzer is constructed.
-   * 
-   * @param stopWords An array of stopwords
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
-   */
-  public static CharArraySet makeStopSet(String... stopWords) {
-    return makeStopSet(stopWords, false);
-  }
-  
-  /**
-   * Builds a Set from an array of stop words,
-   * appropriate for passing into the StopFilter constructor.
-   * This permits this stopWords construction to be cached once when
-   * an Analyzer is constructed.
-   * 
-   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
-   * @return A Set ({@link CharArraySet}) containing the words
-   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
-   */
-  public static CharArraySet makeStopSet(List<?> stopWords) {
-    return makeStopSet(stopWords, false);
-  }
-    
-  /**
-   * Creates a stopword set from the given stopword array.
-   * 
-   * @param stopWords An array of stopwords
-   * @param ignoreCase If true, all words are lower cased first.  
-   * @return a Set containing the words
-   */    
-  public static CharArraySet makeStopSet(String[] stopWords, boolean ignoreCase) {
-    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
-    stopSet.addAll(Arrays.asList(stopWords));
-    return stopSet;
-  }
-  
-  /**
-   * Creates a stopword set from the given stopword list.
-   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
-   * @param ignoreCase if true, all words are lower cased first
-   * @return A Set ({@link CharArraySet}) containing the words
-   */
-  public static CharArraySet makeStopSet(List<?> stopWords, boolean ignoreCase){
-    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
-    stopSet.addAll(stopWords);
-    return stopSet;
-  }
-  
-  /**
-   * Returns the next input Token whose term() is not a stop word.
-   */
-  @Override
-  protected boolean accept() {
-    return !stopWords.contains(termAtt.buffer(), 0, termAtt.length());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
index d3f6aff..17e2a89 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/StopFilterFactory.java
@@ -17,15 +17,16 @@
 package org.apache.lucene.analysis.core;
 
 
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.WordlistLoader; // jdocs
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;
-import org.apache.lucene.analysis.util.WordlistLoader; // jdocs
-
-import java.util.Map;
-import java.io.IOException;
 
 /**
  * Factory for {@link StopFilter}.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
index d7447d6..cc1547c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/TypeTokenFilter.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.core;
 
 import java.util.Set;
 
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
 
 /**
  * Removes tokens whose types appear in a set of blocked types from a token stream.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
index 6d3f6bb..7b28997 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/UpperCaseFilter.java
@@ -19,10 +19,11 @@ package org.apache.lucene.analysis.core;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.CharacterUtils;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharacterUtils;
 
 /**
  * Normalizes token text to UPPER CASE.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
index 6b664c3..9777179 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechAnalyzer.java
@@ -17,22 +17,22 @@
 package org.apache.lucene.analysis.cz;
 
 
+import java.io.*;
+import java.nio.charset.StandardCharsets;
+
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
-import java.io.*;
-import java.nio.charset.StandardCharsets;
-
 /**
  * {@link Analyzer} for Czech language.
  * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
index 1b11a1c..f9c316d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/da/DanishAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.DanishStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
index 23e01be..790fc48 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/de/GermanAnalyzer.java
@@ -23,18 +23,18 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
index f039edb..c85b6ec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekAnalyzer.java
@@ -20,14 +20,14 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * {@link Analyzer} for the Greek language. 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
index c09cafa..75d0840 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/el/GreekStemmer.java
@@ -16,7 +16,7 @@
  */
 package org.apache.lucene.analysis.el;
 
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 import java.util.Arrays;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
index 721d9b2..16dc0c5 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/EnglishAnalyzer.java
@@ -20,16 +20,16 @@ package org.apache.lucene.analysis.en;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * {@link Analyzer} for English.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
index 3348d9a..f0bfecd 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/en/KStemmer.java
@@ -55,7 +55,7 @@ the original shown below)
  */
 package org.apache.lucene.analysis.en;
 
-import org.apache.lucene.analysis.util.CharArrayMap;
+import org.apache.lucene.analysis.CharArrayMap;
 import org.apache.lucene.analysis.util.OpenStringBuilder;
 /**
  * <p>Title: Kstemmer</p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
index 0e4747f..ab5b6c3 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/es/SpanishAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
index db83cfb..cff2da0 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/eu/BasqueAnalyzer.java
@@ -21,16 +21,16 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.BasqueStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
index 256c78b..2515d1e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
index 4cc62db..6b00101 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.FinnishStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
index 86088fd..5f90246 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchAnalyzer.java
@@ -17,27 +17,27 @@
 package org.apache.lucene.analysis.fr;
 
 
+import java.io.IOException;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.standard.StandardAnalyzer;  // for javadoc
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-
 /**
  * {@link Analyzer} for French language. 
  * <p>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
index 1e6d39a..1ca3455 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/ga/IrishAnalyzer.java
@@ -22,16 +22,16 @@ import java.io.Reader;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.IrishStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
index b9de3fa..372a6ec 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/gl/GalicianAnalyzer.java
@@ -22,16 +22,16 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
index 22e930b..1b57129 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiAnalyzer.java
@@ -20,16 +20,16 @@ package org.apache.lucene.analysis.hi;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.in.IndicNormalizationFilter;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
index 31fe9e2..0615bdc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.HungarianStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
index 748b3f1..7687d21 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Stemmer.java
@@ -22,7 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.store.ByteArrayDataInput;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
index 857117a..8c04639 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hy/ArmenianAnalyzer.java
@@ -21,16 +21,16 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.ArmenianStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
index f7be17f..fc9b4d2 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/id/IndonesianAnalyzer.java
@@ -20,15 +20,15 @@ package org.apache.lucene.analysis.id;
 import java.io.IOException;
 import java.io.Reader;
 
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * Analyzer for Indonesian (Bahasa)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
index 27027fa..a18aa5d 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/it/ItalianAnalyzer.java
@@ -23,18 +23,18 @@ import java.nio.charset.StandardCharsets;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ElisionFilter;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
index f0424c9..5e24cf9 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lt/LithuanianAnalyzer.java
@@ -21,16 +21,16 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.LithuanianStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
index b22339d..0a016af 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/lv/LatvianAnalyzer.java
@@ -22,16 +22,16 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**