You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/14 22:51:18 UTC

[06/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl b/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
deleted file mode 100644
index ec37924..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/standard/generateJavaUnicodeWordBreakTest.pl
+++ /dev/null
@@ -1,232 +0,0 @@
-#!/usr/bin/perl
-
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-use warnings;
-use strict;
-use File::Spec;
-use Getopt::Long;
-use LWP::UserAgent;
-
-my ($volume, $directory, $script_name) = File::Spec->splitpath($0);
-
-my $version = '';
-unless (GetOptions("version=s" => \$version) && $version =~ /\d+\.\d+\.\d+/) {
-  print STDERR "Usage: $script_name -v <version>\n";
-  print STDERR "\tversion must be of the form X.Y.Z, e.g. 5.2.0\n"
-      if ($version);
-  exit 1;
-}
-my $url_prefix = "http://www.unicode.org/Public/${version}/ucd";
-my $scripts_url = "${url_prefix}/Scripts.txt";
-my $line_break_url = "${url_prefix}/LineBreak.txt";
-my $word_break_url = "${url_prefix}/auxiliary/WordBreakProperty.txt";
-my $word_break_test_url = "${url_prefix}/auxiliary/WordBreakTest.txt";
-my $underscore_version = $version;
-$underscore_version =~ s/\./_/g;
-my $class_name = "WordBreakTestUnicode_${underscore_version}";
-my $output_filename = "${class_name}.java";
-my $header =<<"__HEADER__";
-package org.apache.lucene.analysis.core;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.junit.Ignore;
-
-/**
- * This class was automatically generated by ${script_name}
- * from: ${url_prefix}/auxiliary/WordBreakTest.txt
- *
- * WordBreakTest.txt indicates the points in the provided character sequences
- * at which conforming implementations must and must not break words.  This
- * class tests for expected token extraction from each of the test sequences
- * in WordBreakTest.txt, where the expected tokens are those character
- * sequences bounded by word breaks and containing at least one character
- * from one of the following character sets:
- *
- *    \\p{Script = Han}                (From $scripts_url)
- *    \\p{Script = Hiragana}
- *    \\p{LineBreak = Complex_Context} (From $line_break_url)
- *    \\p{WordBreak = ALetter}         (From $word_break_url)
- *    \\p{WordBreak = Hebrew_Letter}
- *    \\p{WordBreak = Katakana}
- *    \\p{WordBreak = Numeric}         (Excludes full-width Arabic digits)
- *    [\\uFF10-\\uFF19]                (Full-width Arabic digits)
- */
-\@Ignore
-public class ${class_name} extends BaseTokenStreamTestCase {
-
-  public void test(Analyzer analyzer) throws Exception {
-__HEADER__
-
-my $codepoints = [];
-map { $codepoints->[$_] = 1 } (0xFF10..0xFF19);
-# Complex_Context is an alias for 'SA', which is used in LineBreak.txt
-# Using lowercase versions of property value names to allow for case-
-# insensitive comparison with the names in the Unicode data files.
-parse_Unicode_data_file($line_break_url, $codepoints, {'sa' => 1});
-parse_Unicode_data_file($scripts_url, $codepoints, 
-                        {'han' => 1, 'hiragana' => 1});
-parse_Unicode_data_file($word_break_url, $codepoints,
-                        {'aletter' => 1, 'hebrew_letter' => 1, 'katakana' => 1, 'numeric' => 1});
-my @tests = split /\r?\n/, get_URL_content($word_break_test_url);
-
-my $output_path = File::Spec->catpath($volume, $directory, $output_filename);
-open OUT, ">$output_path"
-  || die "Error opening '$output_path' for writing: $!";
-
-print STDERR "Writing '$output_path'...";
-
-print OUT $header;
-
-for my $line (@tests) {
-  next if ($line =~ /^\s*(?:|\#.*)$/); # Skip blank or comment-only lines
-  # Example line: � 0001 � 0300 �  #  � [0.2] <START OF HEADING> (Other) � [4.0] COMBINING GRAVE ACCENT (Extend_FE) � [0.3]
-  my ($sequence) = $line =~ /^(.*?)\s*\#/;
-  $line =~ s/\t/  /g; # Convert tabs to two spaces (no tabs allowed in Lucene source)
-  print OUT "    // $line\n";
-  $sequence =~ s/\s*�\s*$//; # Trim trailing break character
-  my $test_string = $sequence;
-  $test_string =~ s/\s*�\s*/\\u/g;
-  $test_string =~ s/\s*�\s*/\\u/g;
-  $test_string =~ s/\\u([0-9A-F]{5,})/join('', map { "\\u$_" } above_BMP_char_to_surrogates($1))/ge;
-  $test_string =~ s/\\u000A/\\n/g;
-  $test_string =~ s/\\u000D/\\r/g;
-  $test_string =~ s/\\u0022/\\\"/g;
-  $sequence =~ s/^\s*�\s*//; # Trim leading break character
-  my @tokens = ();
-  for my $candidate (split /\s*�\s*/, $sequence) {
-    my @chars = ();
-    my $has_wanted_char = 0;
-    while ($candidate =~ /([0-9A-F]+)/gi) {
-      my $hexchar = $1;
-      if (4 == length($hexchar)) {
-        push @chars, $hexchar;
-      } else {
-        push @chars, above_BMP_char_to_surrogates($hexchar);
-      }
-      unless ($has_wanted_char) {
-        $has_wanted_char = 1 if (defined($codepoints->[hex($hexchar)]));
-      }
-    }
-    if ($has_wanted_char) {
-      push @tokens, '"'.join('', map { "\\u$_" } @chars).'"';
-    }
-  }
-  print OUT "    assertAnalyzesTo(analyzer, \"${test_string}\",\n";
-  print OUT "                     new String[] { ";
-  print OUT join(", ", @tokens), " });\n\n";
-}
-
-print OUT "  }\n}\n";
-close OUT;
-print STDERR "done.\n";
-
-
-# sub above_BMP_char_to_surrogates
-#
-# Converts hex references to chars above the BMP (i.e., greater than 0xFFFF)
-# to the corresponding UTF-16 surrogate pair
-#
-# Assumption: input string is a sequence more than four hex digits
-#
-sub above_BMP_char_to_surrogates {
-  my $ch = hex(shift);
-  my $high_surrogate = 0xD800 + (($ch - 0x10000) >> 10);
-  my $low_surrogate  = 0xDC00 + ($ch & 0x3FF);
-  return map { sprintf("%04X", $_) } ($high_surrogate, $low_surrogate);
-}
-
-
-# sub parse_Unicode_data_file
-#
-# Downloads and parses the specified Unicode data file, parses it, and
-# extracts code points assigned any of the given property values, defining
-# the corresponding array position in the passed-in target array.
-#
-# Takes in the following parameters:
-#
-#  - URL of the Unicode data file to download and parse
-#  - Reference to target array
-#  - Reference to hash of property values to get code points for
-#
-sub parse_Unicode_data_file {
-  my $url = shift;
-  my $target = shift;
-  my $wanted_property_values = shift;
-  my $content = get_URL_content($url);
-  print STDERR "Parsing '$url'...";
-  my @lines = split /\r?\n/, $content;
-  for (@lines) {
-    s/\s*#.*//;         # Strip trailing comments
-    s/\s+$//;           # Strip trailing space
-    next unless (/\S/); # Skip empty lines
-    my ($start, $end, $property_value);
-    if (/^([0-9A-F]{4,5})\s*;\s*(.+)/i) {
-      # 00AA       ; LATIN
-      $start = $end = hex $1;
-      $property_value = lc $2; # Property value names are case-insensitive
-    } elsif (/^([0-9A-F]{4,5})..([0-9A-F]{4,5})\s*;\s*(.+)/i) {
-      # 0AE6..0AEF ; Gujarati
-      $start = hex $1;
-      $end = hex $2;
-      $property_value = lc $3; # Property value names are case-insensitive
-    } else {
-      next;
-    }
-    if (defined($wanted_property_values->{$property_value})) {
-      for my $code_point ($start..$end) {
-        $target->[$code_point] = 1;
-      }
-    }
-  }
-  print STDERR "done.\n";
-}
-
-# sub get_URL_content
-#
-# Retrieves and returns the content of the given URL.
-#
-sub get_URL_content {
-  my $url = shift;
-  print STDERR "Retrieving '$url'...";
-  my $user_agent = LWP::UserAgent->new;
-  my $request = HTTP::Request->new(GET => $url);
-  my $response = $user_agent->request($request);
-  unless ($response->is_success) {
-    print STDERR "Failed to download '$url':\n\t",$response->status_line,"\n";
-    exit 1;
-  }
-  print STDERR "done.\n";
-  return $response->content;
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
index b9d586e..b7f45cb 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestSwedishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
index ebe1034..fd7aefd 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/sv/TestSwedishLightStemFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 import static org.apache.lucene.analysis.VocabularyAssert.*;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
index c4b9276..580e269 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/th/TestThaiAnalyzer.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.th;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.StopAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
index 9972702..9cfc6fc 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/tr/TestTurkishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestTurkishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
deleted file mode 100644
index 66b0dce..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArrayMap.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-import java.util.*;
-
-import org.apache.lucene.analysis.util.CharArrayMap;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestCharArrayMap extends LuceneTestCase {
-  public void doRandom(int iter, boolean ignoreCase) {
-    CharArrayMap<Integer> map = new CharArrayMap<>(1, ignoreCase);
-    HashMap<String,Integer> hmap = new HashMap<>();
-
-    char[] key;
-    for (int i=0; i<iter; i++) {
-      int len = random().nextInt(5);
-      key = new char[len];
-      for (int j=0; j<key.length; j++) {
-        key[j] = (char)random().nextInt(127);
-      }
-      String keyStr = new String(key);
-      String hmapKey = ignoreCase ? keyStr.toLowerCase(Locale.ROOT) : keyStr; 
-
-      int val = random().nextInt();
-
-      Object o1 = map.put(key, val);
-      Object o2 = hmap.put(hmapKey,val);
-      assertEquals(o1,o2);
-
-      // add it again with the string method
-      assertEquals(val, map.put(keyStr,val).intValue());
-
-      assertEquals(val, map.get(key,0,key.length).intValue());
-      assertEquals(val, map.get(key).intValue());
-      assertEquals(val, map.get(keyStr).intValue());
-
-      assertEquals(hmap.size(), map.size());
-    }
-  }
-
-  public void testCharArrayMap() {
-    int num = 5 * RANDOM_MULTIPLIER;
-    for (int i = 0; i < num; i++) { // pump this up for more random testing
-      doRandom(1000,false);
-      doRandom(1000,true);      
-    }
-  }
-
-  public void testMethods() {
-    CharArrayMap<Integer> cm = new CharArrayMap<>(2, false);
-    HashMap<String,Integer> hm = new HashMap<>();
-    hm.put("foo",1);
-    hm.put("bar",2);
-    cm.putAll(hm);
-    assertEquals(hm.size(), cm.size());
-    hm.put("baz", 3);
-    cm.putAll(hm);
-    assertEquals(hm.size(), cm.size());
-
-    CharArraySet cs = cm.keySet();
-    int n=0;
-    for (Object o : cs) {
-      assertTrue(cm.containsKey(o));
-      char[] co = (char[]) o;
-      assertTrue(cm.containsKey(co, 0, co.length));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-    assertEquals(hm.size(), cs.size());
-    assertEquals(cm.size(), cs.size());
-    cs.clear();
-    assertEquals(0, cs.size());
-    assertEquals(0, cm.size());
-    // keySet() should not allow adding new keys
-    expectThrows(UnsupportedOperationException.class, () -> {
-      cs.add("test");
-    });
-
-    cm.putAll(hm);
-    assertEquals(hm.size(), cs.size());
-    assertEquals(cm.size(), cs.size());
-
-    Iterator<Map.Entry<Object,Integer>> iter1 = cm.entrySet().iterator();
-    n=0;
-    while (iter1.hasNext()) {
-      Map.Entry<Object,Integer> entry = iter1.next();
-      Object key = entry.getKey();
-      Integer val = entry.getValue();
-      assertEquals(cm.get(key), val);
-      entry.setValue(val*100);
-      assertEquals(val*100, (int)cm.get(key));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-    cm.clear();
-    cm.putAll(hm);
-    assertEquals(cm.size(), n);
-
-    CharArrayMap<Integer>.EntryIterator iter2 = cm.entrySet().iterator();
-    n=0;
-    while (iter2.hasNext()) {
-      char[] keyc = iter2.nextKey();
-      Integer val = iter2.currentValue();
-      assertEquals(hm.get(new String(keyc)), val);
-      iter2.setValue(val*100);
-      assertEquals(val*100, (int)cm.get(keyc));
-      n++;
-    }
-    assertEquals(hm.size(), n);
-
-    cm.entrySet().clear();
-    assertEquals(0, cm.size());
-    assertEquals(0, cm.entrySet().size());
-    assertTrue(cm.isEmpty());
-  }
-
-  // TODO: break this up into simpler test methods vs. "telling a story"
-  public void testModifyOnUnmodifiable(){
-    CharArrayMap<Integer> map = new CharArrayMap<>(2, false);
-    map.put("foo",1);
-    map.put("bar",2);
-    final int size = map.size();
-    assertEquals(2, size);
-    assertTrue(map.containsKey("foo"));  
-    assertEquals(1, map.get("foo").intValue());  
-    assertTrue(map.containsKey("bar"));  
-    assertEquals(2, map.get("bar").intValue());  
-
-    map = CharArrayMap.unmodifiableMap(map);
-    assertEquals("Map size changed due to unmodifiableMap call" , size, map.size());
-    String NOT_IN_MAP = "SirGallahad";
-    assertFalse("Test String already exists in map", map.containsKey(NOT_IN_MAP));
-    assertNull("Test String already exists in map", map.get(NOT_IN_MAP));
-    
-    try{
-      map.put(NOT_IN_MAP.toCharArray(), 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put(NOT_IN_MAP, 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put(new StringBuilder(NOT_IN_MAP), 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.entrySet().clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.keySet().clear();  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.put((Object) NOT_IN_MAP, 3);  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    try{
-      map.putAll(Collections.singletonMap(NOT_IN_MAP, 3));  
-      fail("Modified unmodifiable map");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable map", map.containsKey(NOT_IN_MAP));
-      assertNull("Test String has been added to unmodifiable map", map.get(NOT_IN_MAP));
-      assertEquals("Size of unmodifiable map has changed", size, map.size());
-    }
-    
-    assertTrue(map.containsKey("foo"));  
-    assertEquals(1, map.get("foo").intValue());  
-    assertTrue(map.containsKey("bar"));  
-    assertEquals(2, map.get("bar").intValue());  
-  }
-  
-  public void testToString() {
-    CharArrayMap<Integer> cm = new CharArrayMap<>(Collections.singletonMap("test",1), false);
-    assertEquals("[test]",cm.keySet().toString());
-    assertEquals("[1]",cm.values().toString());
-    assertEquals("[test=1]",cm.entrySet().toString());
-    assertEquals("{test=1}",cm.toString());
-    cm.put("test2", 2);
-    assertTrue(cm.keySet().toString().contains(", "));
-    assertTrue(cm.values().toString().contains(", "));
-    assertTrue(cm.entrySet().toString().contains(", "));
-    assertTrue(cm.toString().contains(", "));
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
deleted file mode 100644
index 1fcee65..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharArraySet.java
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.*;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-
-public class TestCharArraySet extends LuceneTestCase {
-  
-  static final String[] TEST_STOP_WORDS = {
-    "a", "an", "and", "are", "as", "at", "be", "but", "by",
-    "for", "if", "in", "into", "is", "it",
-    "no", "not", "of", "on", "or", "such",
-    "that", "the", "their", "then", "there", "these",
-    "they", "this", "to", "was", "will", "with"
-  };
-  
-  
-  public void testRehash() throws Exception {
-    CharArraySet cas = new CharArraySet(0, true);
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      cas.add(TEST_STOP_WORDS[i]);
-    assertEquals(TEST_STOP_WORDS.length, cas.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertTrue(cas.contains(TEST_STOP_WORDS[i]));
-  }
-
-  public void testNonZeroOffset() {
-    String[] words={"Hello","World","this","is","a","test"};
-    char[] findme="xthisy".toCharArray();   
-    CharArraySet set= new CharArraySet(10, true);
-    set.addAll(Arrays.asList(words));
-    assertTrue(set.contains(findme, 1, 4));
-    assertTrue(set.contains(new String(findme,1,4)));
-    
-    // test unmodifiable
-    set = CharArraySet.unmodifiableSet(set);
-    assertTrue(set.contains(findme, 1, 4));
-    assertTrue(set.contains(new String(findme,1,4)));
-  }
-  
-  public void testObjectContains() {
-    CharArraySet set = new CharArraySet(10, true);
-    Integer val = Integer.valueOf(1);
-    set.add(val);
-    assertTrue(set.contains(val));
-    assertTrue(set.contains(new Integer(1))); // another integer
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-    // test unmodifiable
-    set = CharArraySet.unmodifiableSet(set);
-    assertTrue(set.contains(val));
-    assertTrue(set.contains(new Integer(1))); // another integer
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-  }
-  
-  public void testClear(){
-    CharArraySet set=new CharArraySet(10,true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    set.clear();
-    assertEquals("not empty", 0, set.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertFalse(set.contains(TEST_STOP_WORDS[i]));
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    assertEquals("Not all words added", TEST_STOP_WORDS.length, set.size());
-    for(int i=0;i<TEST_STOP_WORDS.length;i++)
-      assertTrue(set.contains(TEST_STOP_WORDS[i]));
-  }
-  
-  // TODO: break this up into simpler test methods, vs "telling a story"
-  public void testModifyOnUnmodifiable(){
-    CharArraySet set=new CharArraySet(10, true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    final int size = set.size();
-    set = CharArraySet.unmodifiableSet(set);
-    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
-    String NOT_IN_SET = "SirGallahad";
-    assertFalse("Test String already exists in set", set.contains(NOT_IN_SET));
-    
-    try{
-      set.add(NOT_IN_SET.toCharArray());  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.add(NOT_IN_SET);  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.add(new StringBuilder(NOT_IN_SET));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.clear();  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Changed unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    try{
-      set.add((Object) NOT_IN_SET);  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    // This test was changed in 3.1, as a contains() call on the given Collection using the "correct" iterator's
-    // current key (now a char[]) on a Set<String> would not hit any element of the CAS and therefor never call
-    // remove() on the iterator
-    try{
-      set.removeAll(new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.retainAll(new CharArraySet(Arrays.asList(NOT_IN_SET), true));  
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertEquals("Size of unmodifiable set has changed", size, set.size());
-    }
-    
-    try{
-      set.addAll(Arrays.asList(NOT_IN_SET));
-      fail("Modified unmodifiable set");
-    }catch (UnsupportedOperationException e) {
-      // expected
-      assertFalse("Test String has been added to unmodifiable set", set.contains(NOT_IN_SET));
-    }
-    
-    for (int i = 0; i < TEST_STOP_WORDS.length; i++) {
-      assertTrue(set.contains(TEST_STOP_WORDS[i]));  
-    }
-  }
-  
-  public void testUnmodifiableSet(){
-    CharArraySet set = new CharArraySet(10,true);
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-    set.add(Integer.valueOf(1));
-    final int size = set.size();
-    set = CharArraySet.unmodifiableSet(set);
-    assertEquals("Set size changed due to unmodifiableSet call" , size, set.size());
-    for (String stopword : TEST_STOP_WORDS) {
-      assertTrue(set.contains(stopword));
-    }
-    assertTrue(set.contains(Integer.valueOf(1)));
-    assertTrue(set.contains("1"));
-    assertTrue(set.contains(new char[]{'1'}));
-    
-    expectThrows(NullPointerException.class, () -> {
-      CharArraySet.unmodifiableSet(null);
-    });
-  }
-  
-  public void testSupplementaryChars() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    // for reference see
-    // http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[[%3ACase_Sensitive%3DTrue%3A]%26[^[\u0000-\uFFFF]]]&esc=on
-    String[] upperArr = new String[] {"Abc\ud801\udc1c",
-        "\ud801\udc1c\ud801\udc1cCDE", "A\ud801\udc1cB"};
-    String[] lowerArr = new String[] {"abc\ud801\udc44",
-        "\ud801\udc44\ud801\udc44cde", "a\ud801\udc44b"};
-    CharArraySet set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS), false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(Locale.ROOT, falsePos, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-  }
-  
-  public void testSingleHighSurrogate() {
-    String missing = "Term %s is missing in the set";
-    String falsePos = "Term %s is in the set but shouldn't";
-    String[] upperArr = new String[] { "ABC\uD800", "ABC\uD800EfG",
-        "\uD800EfG", "\uD800\ud801\udc1cB" };
-
-    String[] lowerArr = new String[] { "abc\uD800", "abc\uD800efg",
-        "\uD800efg", "\uD800\ud801\udc44b" };
-    CharArraySet set = new CharArraySet(Arrays
-        .asList(TEST_STOP_WORDS), true);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertTrue(String.format(Locale.ROOT, missing, lowerArr[i]), set.contains(lowerArr[i]));
-    }
-    set = new CharArraySet(Arrays.asList(TEST_STOP_WORDS),
-        false);
-    for (String upper : upperArr) {
-      set.add(upper);
-    }
-    for (int i = 0; i < upperArr.length; i++) {
-      assertTrue(String.format(Locale.ROOT, missing, upperArr[i]), set.contains(upperArr[i]));
-      assertFalse(String.format(Locale.ROOT, falsePos, upperArr[i]), set
-          .contains(lowerArr[i]));
-    }
-  }
-  
-  @SuppressWarnings("deprecated")
-  public void testCopyCharArraySetBWCompat() {
-    CharArraySet setIngoreCase = new CharArraySet(10, true);
-    CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setIngoreCase.add(Integer.valueOf(1));
-    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setCaseSensitive.add(Integer.valueOf(1));
-
-    CharArraySet copy = CharArraySet.copy(setIngoreCase);
-    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
-    assertEquals(setIngoreCase.size(), copy.size());
-    assertEquals(setCaseSensitive.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copyCaseSens.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copyCaseSens.contains(string));
-    }
-    // test adding terms to the copy
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(setIngoreCase.contains(string));  
-      assertFalse(setCaseSensitive.contains(string));  
-
-    }
-  }
-  
-  /**
-   * Test the static #copy() function with a CharArraySet as a source
-   */
-  public void testCopyCharArraySet() {
-    CharArraySet setIngoreCase = new CharArraySet(10, true);
-    CharArraySet setCaseSensitive = new CharArraySet(10, false);
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    setIngoreCase.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setIngoreCase.add(Integer.valueOf(1));
-    setCaseSensitive.addAll(Arrays.asList(TEST_STOP_WORDS));
-    setCaseSensitive.add(Integer.valueOf(1));
-
-    CharArraySet copy = CharArraySet.copy(setIngoreCase);
-    CharArraySet copyCaseSens = CharArraySet.copy(setCaseSensitive);
-
-    assertEquals(setIngoreCase.size(), copy.size());
-    assertEquals(setCaseSensitive.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copyCaseSens.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copyCaseSens.contains(string));
-    }
-    // test adding terms to the copy
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(stopwordsUpper));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(setIngoreCase.contains(string));  
-      assertFalse(setCaseSensitive.contains(string));  
-
-    }
-  }
-  
-  /**
-   * Test the static #copy() function with a JDK {@link Set} as a source
-   */
-  public void testCopyJDKSet() {
-    Set<String> set = new HashSet<>();
-
-    List<String> stopwords = Arrays.asList(TEST_STOP_WORDS);
-    List<String> stopwordsUpper = new ArrayList<>();
-    for (String string : stopwords) {
-      stopwordsUpper.add(string.toUpperCase(Locale.ROOT));
-    }
-    set.addAll(Arrays.asList(TEST_STOP_WORDS));
-
-    CharArraySet copy = CharArraySet.copy(set);
-
-    assertEquals(set.size(), copy.size());
-    assertEquals(set.size(), copy.size());
-
-    assertTrue(copy.containsAll(stopwords));
-    for (String string : stopwordsUpper) {
-      assertFalse(copy.contains(string));
-    }
-    
-    List<String> newWords = new ArrayList<>();
-    for (String string : stopwords) {
-      newWords.add(string+"_1");
-    }
-    copy.addAll(newWords);
-    
-    assertTrue(copy.containsAll(stopwords));
-    assertTrue(copy.containsAll(newWords));
-    // new added terms are not in the source set
-    for (String string : newWords) {
-      assertFalse(set.contains(string));  
-    }
-  }
-  
-  /**
-   * Tests a special case of {@link CharArraySet#copy(Set)} where the
-   * set to copy is the {@link CharArraySet#EMPTY_SET}
-   */
-  public void testCopyEmptySet() {
-    assertSame(CharArraySet.EMPTY_SET, 
-        CharArraySet.copy(CharArraySet.EMPTY_SET));
-  }
-
-  /**
-   * Smoketests the static empty set
-   */
-  public void testEmptySet() {
-    assertEquals(0, CharArraySet.EMPTY_SET.size());
-    
-    assertTrue(CharArraySet.EMPTY_SET.isEmpty());
-    for (String stopword : TEST_STOP_WORDS) {
-      assertFalse(CharArraySet.EMPTY_SET.contains(stopword));
-    }
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo"));
-    assertFalse(CharArraySet.EMPTY_SET.contains((Object) "foo"));
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray()));
-    assertFalse(CharArraySet.EMPTY_SET.contains("foo".toCharArray(),0,3));
-  }
-  
-  /**
-   * Test for NPE
-   */
-  public void testContainsWithNull() {
-    CharArraySet set = new CharArraySet(1, true);
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((char[]) null, 0, 10);
-    });
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((CharSequence) null);
-    });
-
-    expectThrows(NullPointerException.class, () -> {
-      set.contains((Object) null);
-    });
-  }
-  
-  public void testToString() {
-    CharArraySet set = CharArraySet.copy(Collections.singleton("test"));
-    assertEquals("[test]", set.toString());
-    set.add("test2");
-    assertTrue(set.toString().contains(", "));
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
deleted file mode 100644
index 04e96ea..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharacterUtils.java
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-import java.io.Reader;
-import java.io.StringReader;
-import java.util.Arrays;
-
-import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.TestUtil;
-import org.junit.Test;
-
-/**
- * TestCase for the {@link CharacterUtils} class.
- */
-public class TestCharacterUtils extends LuceneTestCase {
-
-  public void testConversions() {
-    final char[] orig = TestUtil.randomUnicodeString(random(), 100).toCharArray();
-    final int[] buf = new int[orig.length];
-    final char[] restored = new char[buf.length];
-    final int o1 = TestUtil.nextInt(random(), 0, Math.min(5, orig.length));
-    final int o2 = TestUtil.nextInt(random(), 0, o1);
-    final int o3 = TestUtil.nextInt(random(), 0, o1);
-    final int codePointCount = CharacterUtils.toCodePoints(orig, o1, orig.length - o1, buf, o2);
-    final int charCount = CharacterUtils.toChars(buf, o2, codePointCount, restored, o3);
-    assertEquals(orig.length - o1, charCount);
-    assertArrayEquals(Arrays.copyOfRange(orig, o1, o1 + charCount), Arrays.copyOfRange(restored, o3, o3 + charCount));
-  }
-
-  @Test
-  public void testNewCharacterBuffer() {
-    CharacterBuffer newCharacterBuffer = CharacterUtils.newCharacterBuffer(1024);
-    assertEquals(1024, newCharacterBuffer.getBuffer().length);
-    assertEquals(0, newCharacterBuffer.getOffset());
-    assertEquals(0, newCharacterBuffer.getLength());
-
-    newCharacterBuffer = CharacterUtils.newCharacterBuffer(2);
-    assertEquals(2, newCharacterBuffer.getBuffer().length);
-    assertEquals(0, newCharacterBuffer.getOffset());
-    assertEquals(0, newCharacterBuffer.getLength());
-
-    // length must be >= 2
-    expectThrows(IllegalArgumentException.class, () -> {
-      CharacterUtils.newCharacterBuffer(1);
-    });
-  }
-
-  @Test
-  public void testFillNoHighSurrogate() throws IOException {
-    Reader reader = new StringReader("helloworld");
-    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(6);
-    assertTrue(CharacterUtils.fill(buffer,reader));
-    assertEquals(0, buffer.getOffset());
-    assertEquals(6, buffer.getLength());
-    assertEquals("hellow", new String(buffer.getBuffer()));
-    assertFalse(CharacterUtils.fill(buffer,reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals(0, buffer.getOffset());
-
-    assertEquals("orld", new String(buffer.getBuffer(), buffer.getOffset(),
-        buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer,reader));
-  }
-
-  @Test
-  public void testFill() throws IOException {
-    String input = "1234\ud801\udc1c789123\ud801\ud801\udc1c\ud801";
-    Reader reader = new StringReader(input);
-    CharacterBuffer buffer = CharacterUtils.newCharacterBuffer(5);
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals("1234", new String(buffer.getBuffer(), buffer.getOffset(),
-        buffer.getLength()));
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(5, buffer.getLength());
-    assertEquals("\ud801\udc1c789", new String(buffer.getBuffer()));
-    assertTrue(CharacterUtils.fill(buffer, reader));
-    assertEquals(4, buffer.getLength());
-    assertEquals("123\ud801", new String(buffer.getBuffer(),
-        buffer.getOffset(), buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer, reader));
-    assertEquals(3, buffer.getLength());
-    assertEquals("\ud801\udc1c\ud801", new String(buffer.getBuffer(), buffer
-        .getOffset(), buffer.getLength()));
-    assertFalse(CharacterUtils.fill(buffer, reader));
-    assertEquals(0, buffer.getLength());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
index 5e1d3c1..f8c1198 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestElision.java
@@ -24,13 +24,13 @@ import java.util.List;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
index be90611..eaa6174 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestFilesystemResourceLoader.java
@@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
deleted file mode 100644
index b1dd1b5..0000000
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestWordlistLoader.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.StringReader;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-import org.apache.lucene.analysis.util.WordlistLoader;
-
-public class TestWordlistLoader extends LuceneTestCase {
-
-  public void testWordlistLoading() throws IOException {
-    String s = "ONE\n  two \nthree";
-    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s));
-    checkSet(wordSet1);
-    CharArraySet wordSet2 = WordlistLoader.getWordSet(new BufferedReader(new StringReader(s)));
-    checkSet(wordSet2);
-  }
-
-  public void testComments() throws Exception {
-    String s = "ONE\n  two \nthree\n#comment";
-    CharArraySet wordSet1 = WordlistLoader.getWordSet(new StringReader(s), "#");
-    checkSet(wordSet1);
-    assertFalse(wordSet1.contains("#comment"));
-    assertFalse(wordSet1.contains("comment"));
-  }
-
-
-  private void checkSet(CharArraySet wordset) {
-    assertEquals(3, wordset.size());
-    assertTrue(wordset.contains("ONE"));  // case is not modified
-    assertTrue(wordset.contains("two"));  // surrounding whitespace is removed
-    assertTrue(wordset.contains("three"));
-    assertFalse(wordset.contains("four"));
-  }
-
-  /**
-   * Test stopwords in snowball format
-   */
-  public void testSnowballListLoading() throws IOException {
-    String s = 
-      "|comment\n" + // commented line
-      " |comment\n" + // commented line with leading whitespace
-      "\n" + // blank line
-      "  \t\n" + // line with only whitespace
-      " |comment | comment\n" + // commented line with comment
-      "ONE\n" + // stopword, in uppercase
-      "   two   \n" + // stopword with leading/trailing space
-      " three   four five \n" + // multiple stopwords
-      "six seven | comment\n"; //multiple stopwords + comment
-    CharArraySet wordset = WordlistLoader.getSnowballWordSet(new StringReader(s));
-    assertEquals(7, wordset.size());
-    assertTrue(wordset.contains("ONE"));
-    assertTrue(wordset.contains("two"));
-    assertTrue(wordset.contains("three"));
-    assertTrue(wordset.contains("four"));
-    assertTrue(wordset.contains("five"));
-    assertTrue(wordset.contains("six"));
-    assertTrue(wordset.contains("seven"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/icu/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/java/overview.html b/lucene/analysis/icu/src/java/overview.html
index abb2e2a..bdace97 100644
--- a/lucene/analysis/icu/src/java/overview.html
+++ b/lucene/analysis/icu/src/java/overview.html
@@ -103,7 +103,7 @@ algorithm.
   </li>
   <li>
     Effective Locale-specific normalization (case differences, diacritics, etc.).
-    ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and 
+    ({@link org.apache.lucene.analysis.LowerCaseFilter} and 
     {@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
     in a generic way that doesn't take into account locale-specific needs.)
   </li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
index f2fd50a..17ea967 100644
--- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
+++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestWithCJKBigramFilter.java
@@ -21,12 +21,12 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cjk.CJKBigramFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.icu.ICUNormalizer2Filter;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
index bff30f1..46d40b1 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.cjk.CJKWidthFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Mode;
 import org.apache.lucene.analysis.ja.dict.UserDictionary;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 
 /**
  * Analyzer for Japanese that uses morphological analysis.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
index b8d0a78..a1af95e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseNumberFilter.java
@@ -44,7 +44,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
  * input tokens tokens \uff13 and \uff12\u5343 and give outputs 3 and 2000 instead of 3200, which is
  * likely not the intended result. If you want to remove punctuation characters from your
  * index that are not part of normalized numbers, add a
- * {@link org.apache.lucene.analysis.core.StopFilter} with the punctuation you wish to
+ * {@link org.apache.lucene.analysis.StopFilter} with the punctuation you wish to
  * remove after {@link JapaneseNumberFilter} in your analyzer chain.
  * <p>
  * Below are some examples of normalizations this filter supports. The input is untokenized
@@ -615,4 +615,4 @@ public class JapaneseNumberFilter extends TokenFilter {
       return position;
     }
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
index 0ee9ccf..342295d 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilter.java
@@ -19,9 +19,9 @@ package org.apache.lucene.analysis.ja;
 
 import java.util.Set;
 
-import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
-import org.apache.lucene.analysis.util.FilteringTokenFilter;
+import org.apache.lucene.analysis.FilteringTokenFilter;
 import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.ja.tokenattributes.PartOfSpeechAttribute;
 
 /**
  * Removes tokens that match a set of part-of-speech tags.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
index 8b5483c..a59de44 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapanesePartOfSpeechStopFilterFactory.java
@@ -22,8 +22,8 @@ import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;
 import org.apache.lucene.analysis.util.TokenFilterFactory;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
index b9ebd36..ab6c473 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseBaseFormFilter.java
@@ -21,11 +21,11 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 public class TestJapaneseBaseFormFilter extends BaseTokenStreamTestCase {
   private Analyzer analyzer;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
index bd14be3..bc57f56 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseKatakanaStemFilter.java
@@ -17,16 +17,16 @@
 package org.apache.lucene.analysis.ja;
 
 
+import java.io.IOException;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-
-import java.io.IOException;
 
 /**
  * Tests for {@link JapaneseKatakanaStemFilter}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
index 27cef33..b8a987a 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseNumberFilter.java
@@ -27,11 +27,11 @@ import java.nio.file.Paths;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.junit.Ignore;
 import org.junit.Test;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
index 3429d86..b35523e 100644
--- a/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
+++ b/lucene/analysis/morfologik/src/java/org/apache/lucene/analysis/morfologik/MorfologikFilter.java
@@ -23,12 +23,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.regex.Pattern;
 
-import morfologik.stemming.Dictionary;
-import morfologik.stemming.DictionaryLookup;
-import morfologik.stemming.IStemmer;
-import morfologik.stemming.WordData;
-import morfologik.stemming.polish.PolishStemmer;
-
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@@ -36,6 +30,12 @@ import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.CharsRefBuilder;
 
+import morfologik.stemming.Dictionary;
+import morfologik.stemming.DictionaryLookup;
+import morfologik.stemming.IStemmer;
+import morfologik.stemming.WordData;
+import morfologik.stemming.polish.PolishStemmer;
+
 /**
  * {@link TokenFilter} using Morfologik library to transform input tokens into lemma and
  * morphosyntactic (POS) tokens. Applies to Polish only.  

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
index d8967c7..c4294e3 100644
--- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
+++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java
@@ -22,13 +22,13 @@ import java.util.TreeSet;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * TODO: The tests below rely on the order of returned lemmas, which is probably not good. 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
index bd1fc7b..5f0347b 100644
--- a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
+++ b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SmartChineseAnalyzer.java
@@ -21,12 +21,12 @@ import java.nio.charset.StandardCharsets;
 import java.util.Set;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.analysis.en.PorterStemFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
index 999ce86..6ed4fda 100644
--- a/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/java/org/apache/lucene/analysis/pl/PolishAnalyzer.java
@@ -22,18 +22,18 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.stempel.StempelStemmer;
 import org.apache.lucene.analysis.stempel.StempelFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
+import org.apache.lucene.analysis.stempel.StempelStemmer;
 import org.apache.lucene.util.IOUtils;
 import org.egothor.stemmer.Trie;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
index b0ef008..c37cedb 100644
--- a/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
+++ b/lucene/analysis/stempel/src/test/org/apache/lucene/analysis/pl/TestPolishAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestPolishAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/common-build.xml
----------------------------------------------------------------------
diff --git a/lucene/common-build.xml b/lucene/common-build.xml
index b4074ac..94b7910 100644
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@@ -2585,4 +2585,34 @@ The following arguments can be provided to ant to alter its behaviour and target
     </sequential>
   </macrodef>
 
+  <macrodef name="run-jflex">
+    <attribute name="dir"/>
+    <attribute name="name"/>
+    <sequential>
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+    </sequential>
+  </macrodef>
+
+  <macrodef name="run-jflex-and-disable-buffer-expansion">
+    <attribute name="dir"/>
+    <attribute name="name"/>
+    <sequential>
+      <jflex file="@{dir}/@{name}.jflex" outdir="@{dir}" nobak="on" inputstreamctor="false"/>
+      <!-- LUCENE-5897: Disallow scanner buffer expansion -->
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="[ \t]*/\* is the buffer big enough\? \*/\s+if \(zzCurrentPos >= zzBuffer\.length.*?\}[ \t]*\r?\n"
+                     replace="" flags="s" />
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="private static final int ZZ_BUFFERSIZE ="
+                     replace="private int ZZ_BUFFERSIZE ="/>
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="int requested = zzBuffer.length - zzEndRead;"
+                     replace="int requested = zzBuffer.length - zzEndRead - zzFinalHighSurrogate;"/>
+      <replaceregexp file="@{dir}/@{name}.java"
+                     match="(zzFinalHighSurrogate = 1;)(\r?\n)"
+                     replace="\1\2          if (totalRead == 1) { return true; }\2"/>
+    </sequential>
+  </macrodef>
+
+
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/core/build.xml
----------------------------------------------------------------------
diff --git a/lucene/core/build.xml b/lucene/core/build.xml
index 90da238..4e62e1c 100644
--- a/lucene/core/build.xml
+++ b/lucene/core/build.xml
@@ -133,7 +133,7 @@
     <delete file="${build.dir}/moman.zip"/>
   </target>
 
-  <target name="regenerate" depends="createLevAutomata,createPackedIntSources"/>
+  <target name="regenerate" depends="createLevAutomata,createPackedIntSources,jflex"/>
   
   <macrodef name="startLockStressTestClient">
     <attribute name="clientId"/>
@@ -223,4 +223,20 @@
   
   <target name="test" depends="common.test, test-lock-factory"/>
 
+  <target name="clean-jflex">
+    <delete>
+      <fileset dir="src/java/org/apache/lucene/analysis/standard" includes="**/*.java">
+        <containsregexp expression="generated.*by.*JFlex"/>
+      </fileset>
+    </delete>
+  </target>
+
+  <target name="jflex" depends="-install-jflex,clean-jflex,-jflex-StandardAnalyzer"/>
+  
+  <target name="-jflex-StandardAnalyzer" depends="init,-install-jflex">
+    <run-jflex-and-disable-buffer-expansion 
+        dir="src/java/org/apache/lucene/analysis/standard" name="StandardTokenizerImpl"/>
+  </target>
+
+
 </project>