You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by mi...@apache.org on 2016/06/14 22:51:22 UTC

[10/12] lucene-solr:branch_6x: LUCENE-7318: graduate StandardAnalyzer and make it the default for IndexWriterConfig

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
deleted file mode 100644
index afc68ce..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package-info.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Fast, general-purpose grammar-based tokenizers.
- * <p>The <code>org.apache.lucene.analysis.standard</code> package contains three
- * fast grammar-based tokenizers constructed with JFlex:</p>
- * <ul>
- *     <li>{@link org.apache.lucene.analysis.standard.StandardTokenizer}:
- *         as of Lucene 3.1, implements the Word Break rules from the Unicode Text 
- *         Segmentation algorithm, as specified in 
- *         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- *         Unlike <code>UAX29URLEmailTokenizer</code>, URLs and email addresses are
- *         <b>not</b> tokenized as single tokens, but are instead split up into 
- *         tokens according to the UAX#29 word break rules.
- *         <br>
- *         {@link org.apache.lucene.analysis.standard.StandardAnalyzer StandardAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.StandardTokenizer StandardTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *         When the <code>Version</code> specified in the constructor is lower than 
- *         3.1, the {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}
- *         implementation is invoked.</li>
- *     <li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
- *         this class was formerly (prior to Lucene 3.1) named 
- *         <code>StandardTokenizer</code>.  (Its tokenization rules are not
- *         based on the Unicode Text Segmentation algorithm.)
- *         {@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *     </li>
- *     <li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
- *         implements the Word Break rules from the Unicode Text Segmentation
- *         algorithm, as specified in 
- *         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- *         URLs and email addresses are also tokenized according to the relevant RFCs.
- *         <br>
- *         {@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
- *         {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
- *         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
- *         {@link org.apache.lucene.analysis.core.LowerCaseFilter LowerCaseFilter}
- *         and {@link org.apache.lucene.analysis.core.StopFilter StopFilter}.
- *     </li>
- * </ul>
- */
-package org.apache.lucene.analysis.standard;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
new file mode 100644
index 0000000..055d0b2
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/package.html
@@ -0,0 +1,50 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in spatial/ -->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+ Fast, general-purpose grammar-based tokenizers.
+ <ul>
+     <li>{@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer}:
+         this class was formerly (prior to Lucene 3.1) named 
+         <code>StandardTokenizer</code>.  (Its tokenization rules are not
+         based on the Unicode Text Segmentation algorithm.)
+         {@link org.apache.lucene.analysis.standard.ClassicAnalyzer ClassicAnalyzer} includes
+         {@link org.apache.lucene.analysis.standard.ClassicTokenizer ClassicTokenizer},
+         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter}, 
+         {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+         and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+     </li>
+     <li>{@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer}:
+         implements the Word Break rules from the Unicode Text Segmentation
+         algorithm, as specified in 
+         <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>, except
+         URLs and email addresses are also tokenized according to the relevant RFCs.
+         <br>
+         {@link org.apache.lucene.analysis.standard.UAX29URLEmailAnalyzer UAX29URLEmailAnalyzer} includes
+         {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer UAX29URLEmailTokenizer},
+         {@link org.apache.lucene.analysis.standard.StandardFilter StandardFilter},
+         {@link org.apache.lucene.analysis.LowerCaseFilter LowerCaseFilter}
+         and {@link org.apache.lucene.analysis.StopFilter StopFilter}.
+     </li>
+ </ul>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
index fd15bbd..fd2aa2e 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 import java.nio.charset.StandardCharsets;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.WordlistLoader;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
-import org.apache.lucene.analysis.util.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.tartarus.snowball.ext.SwedishStemmer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
index 2488665..8bab9a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilterFactory.java
@@ -30,9 +30,9 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoaderAware;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
index 3f2e52a..9543c5c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/th/ThaiAnalyzer.java
@@ -20,13 +20,13 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.LowerCaseFilter;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.DecimalDigitFilter;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
index c9ed471..a21495f 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishAnalyzer.java
@@ -21,15 +21,15 @@ import java.io.IOException;
 import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.StopwordAnalyzerBase;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.standard.StandardFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
-import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
 import org.tartarus.snowball.ext.TurkishStemmer;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
index 8ee809c..f8de8a7 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/AbstractAnalysisFactory.java
@@ -37,7 +37,9 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
 
-import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.CharArraySet;
+import org.apache.lucene.analysis.StopFilter;
+import org.apache.lucene.analysis.WordlistLoader;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.Version;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
deleted file mode 100644
index e414366..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArrayMap.java
+++ /dev/null
@@ -1,669 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.Arrays;
-import java.util.AbstractMap;
-import java.util.AbstractSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lucene.analysis.util.CharacterUtils;
-
-/**
- * A simple class that stores key Strings as char[]'s in a
- * hash table. Note that this is not a general purpose
- * class.  For example, it cannot remove items from the
- * map, nor does it resize its hash table to be smaller,
- * etc.  It is designed to be quick to retrieve items
- * by char[] keys without the necessity of converting
- * to a String first.
- */
-public class CharArrayMap<V> extends AbstractMap<Object,V> {
-  // private only because missing generics
-  private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
-
-  private final static int INIT_SIZE = 8;
-  private boolean ignoreCase;  
-  private int count;
-  char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
-  V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
-
-  /**
-   * Create map with enough capacity to hold startSize terms
-   *
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  @SuppressWarnings("unchecked")
-  public CharArrayMap(int startSize, boolean ignoreCase) {
-    this.ignoreCase = ignoreCase;
-    int size = INIT_SIZE;
-    while(startSize + (startSize>>2) > size)
-      size <<= 1;
-    keys = new char[size][];
-    values = (V[]) new Object[size];
-  }
-
-  /**
-   * Creates a map from the mappings in another map. 
-   *
-   * @param c
-   *          a map whose mappings to be copied
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArrayMap(Map<?,? extends V> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
-    putAll(c);
-  }
-  
-  /** Create set from the supplied map (used internally for readonly maps...) */
-  private CharArrayMap(CharArrayMap<V> toCopy){
-    this.keys = toCopy.keys;
-    this.values = toCopy.values;
-    this.ignoreCase = toCopy.ignoreCase;
-    this.count = toCopy.count;
-  }
-  
-  /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
-  @Override
-  public void clear() {
-    count = 0;
-    Arrays.fill(keys, null);
-    Arrays.fill(values, null);
-  }
-
-  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
-   * are in the {@link #keySet()} */
-  public boolean containsKey(char[] text, int off, int len) {
-    return keys[getSlot(text, off, len)] != null;
-  }
-
-  /** true if the <code>CharSequence</code> is in the {@link #keySet()} */
-  public boolean containsKey(CharSequence cs) {
-    return keys[getSlot(cs)] != null;
-  }
-
-  @Override
-  public boolean containsKey(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return containsKey(text, 0, text.length);
-    } 
-    return containsKey(o.toString());
-  }
-
-  /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
-   * starting at <code>off</code> */
-  public V get(char[] text, int off, int len) {
-    return values[getSlot(text, off, len)];
-  }
-
-  /** returns the value of the mapping of the chars inside this {@code CharSequence} */
-  public V get(CharSequence cs) {
-    return values[getSlot(cs)];
-  }
-
-  @Override
-  public V get(Object o) {
-    if (o instanceof char[]) {
-      final char[] text = (char[])o;
-      return get(text, 0, text.length);
-    } 
-    return get(o.toString());
-  }
-
-  private int getSlot(char[] text, int off, int len) {
-    int code = getHashCode(text, off, len);
-    int pos = code & (keys.length-1);
-    char[] text2 = keys[pos];
-    if (text2 != null && !equals(text, off, len, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (keys.length-1);
-        text2 = keys[pos];
-      } while (text2 != null && !equals(text, off, len, text2));
-    }
-    return pos;
-  }
-
-  /** Returns true if the String is in the set */  
-  private int getSlot(CharSequence text) {
-    int code = getHashCode(text);
-    int pos = code & (keys.length-1);
-    char[] text2 = keys[pos];
-    if (text2 != null && !equals(text, text2)) {
-      final int inc = ((code>>8)+code)|1;
-      do {
-        code += inc;
-        pos = code & (keys.length-1);
-        text2 = keys[pos];
-      } while (text2 != null && !equals(text, text2));
-    }
-    return pos;
-  }
-
-  /** Add the given mapping. */
-  public V put(CharSequence text, V value) {
-    return put(text.toString(), value); // could be more efficient
-  }
-
-  @Override
-  public V put(Object o, V value) {
-    if (o instanceof char[]) {
-      return put((char[])o, value);
-    }
-    return put(o.toString(), value);
-  }
-  
-  /** Add the given mapping. */
-  public V put(String text, V value) {
-    return put(text.toCharArray(), value);
-  }
-
-  /** Add the given mapping.
-   * If ignoreCase is true for this Set, the text array will be directly modified.
-   * The user should never modify this text array after calling this method.
-   */
-  public V put(char[] text, V value) {
-    if (ignoreCase) {
-      CharacterUtils.toLowerCase(text, 0, text.length);
-    }
-    int slot = getSlot(text, 0, text.length);
-    if (keys[slot] != null) {
-      final V oldValue = values[slot];
-      values[slot] = value;
-      return oldValue;
-    }
-    keys[slot] = text;
-    values[slot] = value;
-    count++;
-
-    if (count + (count>>2) > keys.length) {
-      rehash();
-    }
-
-    return null;
-  }
-
-  @SuppressWarnings("unchecked")
-  private void rehash() {
-    assert keys.length == values.length;
-    final int newSize = 2*keys.length;
-    final char[][] oldkeys = keys;
-    final V[] oldvalues = values;
-    keys = new char[newSize][];
-    values = (V[]) new Object[newSize];
-
-    for(int i=0; i<oldkeys.length; i++) {
-      char[] text = oldkeys[i];
-      if (text != null) {
-        // todo: could be faster... no need to compare strings on collision
-        final int slot = getSlot(text,0,text.length);
-        keys[slot] = text;
-        values[slot] = oldvalues[i];
-      }
-    }
-  }
-  
-  private boolean equals(char[] text1, int off, int len, char[] text2) {
-    if (len != text2.length)
-      return false;
-    final int limit = off+len;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = Character.codePointAt(text1, off+i, limit);
-        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
-          return false;
-        i += Character.charCount(codePointAt); 
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1[off+i] != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-
-  private boolean equals(CharSequence text1, char[] text2) {
-    int len = text1.length();
-    if (len != text2.length)
-      return false;
-    if (ignoreCase) {
-      for(int i=0;i<len;) {
-        final int codePointAt = Character.codePointAt(text1, i);
-        if (Character.toLowerCase(codePointAt) != Character.codePointAt(text2, i, text2.length))
-          return false;
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for(int i=0;i<len;i++) {
-        if (text1.charAt(i) != text2[i])
-          return false;
-      }
-    }
-    return true;
-  }
-  
-  private int getHashCode(char[] text, int offset, int len) {
-    if (text == null)
-      throw new NullPointerException();
-    int code = 0;
-    final int stop = offset + len;
-    if (ignoreCase) {
-      for (int i=offset; i<stop;) {
-        final int codePointAt = Character.codePointAt(text, i, stop);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=offset; i<stop; i++) {
-        code = code*31 + text[i];
-      }
-    }
-    return code;
-  }
-
-  private int getHashCode(CharSequence text) {
-    if (text == null)
-      throw new NullPointerException();
-    int code = 0;
-    int len = text.length();
-    if (ignoreCase) {
-      for (int i=0; i<len;) {
-        int codePointAt = Character.codePointAt(text, i);
-        code = code*31 + Character.toLowerCase(codePointAt);
-        i += Character.charCount(codePointAt);
-      }
-    } else {
-      for (int i=0; i<len; i++) {
-        code = code*31 + text.charAt(i);
-      }
-    }
-    return code;
-  }
-
-  @Override
-  public V remove(Object key) {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public int size() {
-    return count;
-  }
-
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder("{");
-    for (Map.Entry<Object,V> entry : entrySet()) {
-      if (sb.length()>1) sb.append(", ");
-      sb.append(entry);
-    }
-    return sb.append('}').toString();
-  }
-
-  private EntrySet entrySet = null;
-  private CharArraySet keySet = null;
-  
-  EntrySet createEntrySet() {
-    return new EntrySet(true);
-  }
-  
-  @Override
-  public final EntrySet entrySet() {
-    if (entrySet == null) {
-      entrySet = createEntrySet();
-    }
-    return entrySet;
-  }
-  
-  // helper for CharArraySet to not produce endless recursion
-  final Set<Object> originalKeySet() {
-    return super.keySet();
-  }
-
-  /** Returns an {@link CharArraySet} view on the map's keys.
-   * The set will use the same {@code matchVersion} as this map. */
-  @Override @SuppressWarnings({"unchecked","rawtypes"})
-  public final CharArraySet keySet() {
-    if (keySet == null) {
-      // prevent adding of entries
-      keySet = new CharArraySet((CharArrayMap) this) {
-        @Override
-        public boolean add(Object o) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(CharSequence text) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(String text) {
-          throw new UnsupportedOperationException();
-        }
-        @Override
-        public boolean add(char[] text) {
-          throw new UnsupportedOperationException();
-        }
-      };
-    }
-    return keySet;
-  }
-
-  /** public iterator class so efficient methods are exposed to users */
-  public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
-    private int pos=-1;
-    private int lastPos;
-    private final boolean allowModify;
-    
-    private EntryIterator(boolean allowModify) {
-      this.allowModify = allowModify;
-      goNext();
-    }
-
-    private void goNext() {
-      lastPos = pos;
-      pos++;
-      while (pos < keys.length && keys[pos] == null) pos++;
-    }
-
-    @Override
-    public boolean hasNext() {
-      return pos < keys.length;
-    }
-
-    /** gets the next key... do not modify the returned char[] */
-    public char[] nextKey() {
-      goNext();
-      return keys[lastPos];
-    }
-
-    /** gets the next key as a newly created String object */
-    public String nextKeyString() {
-      return new String(nextKey());
-    }
-
-    /** returns the value associated with the last key returned */
-    public V currentValue() {
-      return values[lastPos];
-    }
-
-    /** sets the value associated with the last key returned */    
-    public V setValue(V value) {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      V old = values[lastPos];
-      values[lastPos] = value;
-      return old;      
-    }
-
-    /** use nextCharArray() + currentValue() for better efficiency. */
-    @Override
-    public Map.Entry<Object,V> next() {
-      goNext();
-      return new MapEntry(lastPos, allowModify);
-    }
-
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-  }
-
-  private final class MapEntry implements Map.Entry<Object,V> {
-    private final int pos;
-    private final boolean allowModify;
-
-    private MapEntry(int pos, boolean allowModify) {
-      this.pos = pos;
-      this.allowModify = allowModify;
-    }
-
-    @Override
-    public Object getKey() {
-      // we must clone here, as putAll to another CharArrayMap
-      // with other case sensitivity flag would corrupt the keys
-      return keys[pos].clone();
-    }
-
-    @Override
-    public V getValue() {
-      return values[pos];
-    }
-
-    @Override
-    public V setValue(V value) {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      final V old = values[pos];
-      values[pos] = value;
-      return old;
-    }
-
-    @Override
-    public String toString() {
-      return new StringBuilder().append(keys[pos]).append('=')
-        .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
-        .toString();
-    }
-  }
-
-  /** public EntrySet class so efficient methods are exposed to users */
-  public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
-    private final boolean allowModify;
-    
-    private EntrySet(boolean allowModify) {
-      this.allowModify = allowModify;
-    }
-  
-    @Override
-    public EntryIterator iterator() {
-      return new EntryIterator(allowModify);
-    }
-    
-    @Override
-    @SuppressWarnings("unchecked")
-    public boolean contains(Object o) {
-      if (!(o instanceof Map.Entry))
-        return false;
-      final Map.Entry<Object,V> e = (Map.Entry<Object,V>)o;
-      final Object key = e.getKey();
-      final Object val = e.getValue();
-      final Object v = get(key);
-      return v == null ? val == null : v.equals(val);
-    }
-    
-    @Override
-    public boolean remove(Object o) {
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public int size() {
-      return count;
-    }
-    
-    @Override
-    public void clear() {
-      if (!allowModify)
-        throw new UnsupportedOperationException();
-      CharArrayMap.this.clear();
-    }
-  }
-  
-  /**
-   * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
-   * unmodifiable views of internal map for "read-only" use.
-   * 
-   * @param map
-   *          a map for which the unmodifiable map is returned.
-   * @return an new unmodifiable {@link CharArrayMap}.
-   * @throws NullPointerException
-   *           if the given map is <code>null</code>.
-   */
-  public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
-    if (map == null)
-      throw new NullPointerException("Given map is null");
-    if (map == emptyMap() || map.isEmpty())
-      return emptyMap();
-    if (map instanceof UnmodifiableCharArrayMap)
-      return map;
-    return new UnmodifiableCharArrayMap<>(map);
-  }
-
-  /**
-   * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
-   * is a {@link CharArrayMap} the ignoreCase property will be preserved.
-   * 
-   * @param map
-   *          a map to copy
-   * @return a copy of the given map as a {@link CharArrayMap}. If the given map
-   *         is a {@link CharArrayMap} the ignoreCase property as well as the
-   *         matchVersion will be of the given map will be preserved.
-   */
-  @SuppressWarnings("unchecked")
-  public static <V> CharArrayMap<V> copy(final Map<?,? extends V> map) {
-    if(map == EMPTY_MAP)
-      return emptyMap();
-    if(map instanceof CharArrayMap) {
-      CharArrayMap<V> m = (CharArrayMap<V>) map;
-      // use fast path instead of iterating all values
-      // this is even on very small sets ~10 times faster than iterating
-      final char[][] keys = new char[m.keys.length][];
-      System.arraycopy(m.keys, 0, keys, 0, keys.length);
-      final V[] values = (V[]) new Object[m.values.length];
-      System.arraycopy(m.values, 0, values, 0, values.length);
-      m = new CharArrayMap<>(m);
-      m.keys = keys;
-      m.values = values;
-      return m;
-    }
-    return new CharArrayMap<>(map, false);
-  }
-  
-  /** Returns an empty, unmodifiable map. */
-  @SuppressWarnings("unchecked")
-  public static <V> CharArrayMap<V> emptyMap() {
-    return (CharArrayMap<V>) EMPTY_MAP;
-  }
-  
-  // package private CharArraySet instanceof check in CharArraySet
-  static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
-
-    UnmodifiableCharArrayMap(CharArrayMap<V> map) {
-      super(map);
-    }
-
-    @Override
-    public void clear() {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(Object o, V val){
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public V put(char[] text, V val) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(CharSequence text, V val) {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public V put(String text, V val) {
-      throw new UnsupportedOperationException();
-    }
-    
-    @Override
-    public V remove(Object key) {
-      throw new UnsupportedOperationException();
-    }
-  
-    @Override
-    EntrySet createEntrySet() {
-      return new EntrySet(false);
-    }
-  }
-  
-  /**
-   * Empty {@link org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap} optimized for speed.
-   * Contains checks will always return <code>false</code> or throw
-   * NPE if necessary.
-   */
-  private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
-    EmptyCharArrayMap() {
-      super(new CharArrayMap<V>(0, false));
-    }
-    
-    @Override
-    public boolean containsKey(char[] text, int off, int len) {
-      if(text == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean containsKey(CharSequence cs) {
-      if(cs == null)
-        throw new NullPointerException();
-      return false;
-    }
-
-    @Override
-    public boolean containsKey(Object o) {
-      if(o == null)
-        throw new NullPointerException();
-      return false;
-    }
-    
-    @Override
-    public V get(char[] text, int off, int len) {
-      if(text == null)
-        throw new NullPointerException();
-      return null;
-    }
-
-    @Override
-    public V get(CharSequence cs) {
-      if(cs == null)
-        throw new NullPointerException();
-      return null;
-    }
-
-    @Override
-    public V get(Object o) {
-      if(o == null)
-        throw new NullPointerException();
-      return null;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
deleted file mode 100644
index 15485bc..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharArraySet.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.util.AbstractSet;
-import java.util.Collection;
-import java.util.Iterator;
-import java.util.Set;
-
-/**
- * A simple class that stores Strings as char[]'s in a
- * hash table.  Note that this is not a general purpose
- * class.  For example, it cannot remove items from the
- * set, nor does it resize its hash table to be smaller,
- * etc.  It is designed to be quick to test if a char[]
- * is in the set without the necessity of converting it
- * to a String first.
- *
- * <P>
- * <em>Please note:</em> This class implements {@link java.util.Set Set} but
- * does not behave like it should in all cases. The generic type is
- * {@code Set<Object>}, because you can add any object to it,
- * that has a string representation. The add methods will use
- * {@link Object#toString} and store the result using a {@code char[]}
- * buffer. The same behavior have the {@code contains()} methods.
- * The {@link #iterator()} returns an {@code Iterator<char[]>}.
- */
-public class CharArraySet extends AbstractSet<Object> {
-  public static final CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.<Object>emptyMap());
-  private static final Object PLACEHOLDER = new Object();
-  
-  private final CharArrayMap<Object> map;
-  
-  /**
-   * Create set with enough capacity to hold startSize terms
-   * 
-   * @param startSize
-   *          the initial capacity
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArraySet(int startSize, boolean ignoreCase) {
-    this(new CharArrayMap<>(startSize, ignoreCase));
-  }
-
-  /**
-   * Creates a set from a Collection of objects. 
-   * 
-   * @param c
-   *          a collection whose elements to be placed into the set
-   * @param ignoreCase
-   *          <code>false</code> if and only if the set should be case sensitive
-   *          otherwise <code>true</code>.
-   */
-  public CharArraySet(Collection<?> c, boolean ignoreCase) {
-    this(c.size(), ignoreCase);
-    addAll(c);
-  }
-
-  /** Create set from the specified map (internal only), used also by {@link CharArrayMap#keySet()} */
-  CharArraySet(final CharArrayMap<Object> map){
-    this.map = map;
-  }
-  
-  /** Clears all entries in this set. This method is supported for reusing, but not {@link Set#remove}. */
-  @Override
-  public void clear() {
-    map.clear();
-  }
-
-  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
-   * are in the set */
-  public boolean contains(char[] text, int off, int len) {
-    return map.containsKey(text, off, len);
-  }
-
-  /** true if the <code>CharSequence</code> is in the set */
-  public boolean contains(CharSequence cs) {
-    return map.containsKey(cs);
-  }
-
-  @Override
-  public boolean contains(Object o) {
-    return map.containsKey(o);
-  }
-
-  @Override
-  public boolean add(Object o) {
-    return map.put(o, PLACEHOLDER) == null;
-  }
-
-  /** Add this CharSequence into the set */
-  public boolean add(CharSequence text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-  
-  /** Add this String into the set */
-  public boolean add(String text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-
-  /** Add this char[] directly to the set.
-   * If ignoreCase is true for this Set, the text array will be directly modified.
-   * The user should never modify this text array after calling this method.
-   */
-  public boolean add(char[] text) {
-    return map.put(text, PLACEHOLDER) == null;
-  }
-
-  @Override
-  public int size() {
-    return map.size();
-  }
-  
-  /**
-   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
-   * unmodifiable views of internal sets for "read-only" use.
-   * 
-   * @param set
-   *          a set for which the unmodifiable set is returned.
-   * @return an new unmodifiable {@link CharArraySet}.
-   * @throws NullPointerException
-   *           if the given set is <code>null</code>.
-   */
-  public static CharArraySet unmodifiableSet(CharArraySet set) {
-    if (set == null)
-      throw new NullPointerException("Given set is null");
-    if (set == EMPTY_SET)
-      return EMPTY_SET;
-    if (set.map instanceof CharArrayMap.UnmodifiableCharArrayMap)
-      return set;
-    return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
-  }
-
-  /**
-   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
-   * is a {@link CharArraySet} the ignoreCase property will be preserved.
-   * 
-   * @param set
-   *          a set to copy
-   * @return a copy of the given set as a {@link CharArraySet}. If the given set
-   *         is a {@link CharArraySet} the ignoreCase property as well as the
-   *         matchVersion will be of the given set will be preserved.
-   */
-  public static CharArraySet copy(final Set<?> set) {
-    if(set == EMPTY_SET)
-      return EMPTY_SET;
-    if(set instanceof CharArraySet) {
-      final CharArraySet source = (CharArraySet) set;
-      return new CharArraySet(CharArrayMap.copy(source.map));
-    }
-    return new CharArraySet(set, false);
-  }
-  
-  /**
-   * Returns an {@link Iterator} for {@code char[]} instances in this set.
-   */
-  @Override @SuppressWarnings("unchecked")
-  public Iterator<Object> iterator() {
-    // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
-    return map.originalKeySet().iterator();
-  }
-  
-  @Override
-  public String toString() {
-    final StringBuilder sb = new StringBuilder("[");
-    for (Object item : this) {
-      if (sb.length()>1) sb.append(", ");
-      if (item instanceof char[]) {
-        sb.append((char[]) item);
-      } else {
-        sb.append(item);
-      }
-    }
-    return sb.append(']').toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
index 4952f99..9100345 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharTokenizer.java
@@ -22,16 +22,16 @@ import java.util.Objects;
 import java.util.function.IntPredicate;
 import java.util.function.IntUnaryOperator;
 
+import org.apache.lucene.analysis.CharacterUtils.CharacterBuffer;
+import org.apache.lucene.analysis.CharacterUtils;
+import org.apache.lucene.analysis.LowerCaseFilter;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.LetterTokenizer;
-import org.apache.lucene.analysis.core.LowerCaseFilter;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.util.AttributeFactory;
-import org.apache.lucene.analysis.util.CharacterUtils;
-import org.apache.lucene.analysis.util.CharacterUtils.CharacterBuffer;
 
 /**
  * An abstract base class for simple, character-oriented tokenizers.
@@ -285,4 +285,4 @@ public abstract class CharTokenizer extends Tokenizer {
     finalOffset = 0;
     ioBuffer.reset(); // make sure to reset the IO buffer!!
   }
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
deleted file mode 100644
index b728523..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/CharacterUtils.java
+++ /dev/null
@@ -1,251 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-import java.io.Reader;
-
-/**
- * Utility class to write tokenizers or token filters.
- * @lucene.internal
- */
-public final class CharacterUtils {
-
-  private CharacterUtils() {} // no instantiation
-
-  /**
-   * Creates a new {@link CharacterBuffer} and allocates a <code>char[]</code>
-   * of the given bufferSize.
-   * 
-   * @param bufferSize
-   *          the internal char buffer size, must be <code>&gt;= 2</code>
-   * @return a new {@link CharacterBuffer} instance.
-   */
-  public static CharacterBuffer newCharacterBuffer(final int bufferSize) {
-    if (bufferSize < 2) {
-      throw new IllegalArgumentException("buffersize must be >= 2");
-    }
-    return new CharacterBuffer(new char[bufferSize], 0, 0);
-  }
-  
-  
-  /**
-   * Converts each unicode codepoint to lowerCase via {@link Character#toLowerCase(int)} starting 
-   * at the given offset.
-   * @param buffer the char buffer to lowercase
-   * @param offset the offset to start at
-   * @param limit the max char in the buffer to lower case
-   */
-  public static void toLowerCase(final char[] buffer, final int offset, final int limit) {
-    assert buffer.length >= limit;
-    assert offset <=0 && offset <= buffer.length;
-    for (int i = offset; i < limit;) {
-      i += Character.toChars(
-              Character.toLowerCase(
-                  Character.codePointAt(buffer, i, limit)), buffer, i);
-     }
-  }
-
-  /**
-   * Converts each unicode codepoint to UpperCase via {@link Character#toUpperCase(int)} starting 
-   * at the given offset.
-   * @param buffer the char buffer to UPPERCASE
-   * @param offset the offset to start at
-   * @param limit the max char in the buffer to lower case
-   */
-  public static void toUpperCase(final char[] buffer, final int offset, final int limit) {
-    assert buffer.length >= limit;
-    assert offset <=0 && offset <= buffer.length;
-    for (int i = offset; i < limit;) {
-      i += Character.toChars(
-              Character.toUpperCase(
-                  Character.codePointAt(buffer, i, limit)), buffer, i);
-     }
-  }
-
-  /** Converts a sequence of Java characters to a sequence of unicode code points.
-   *  @return the number of code points written to the destination buffer */
-  public static int toCodePoints(char[] src, int srcOff, int srcLen, int[] dest, int destOff) {
-    if (srcLen < 0) {
-      throw new IllegalArgumentException("srcLen must be >= 0");
-    }
-    int codePointCount = 0;
-    for (int i = 0; i < srcLen; ) {
-      final int cp = Character.codePointAt(src, srcOff + i, srcOff + srcLen);
-      final int charCount = Character.charCount(cp);
-      dest[destOff + codePointCount++] = cp;
-      i += charCount;
-    }
-    return codePointCount;
-  }
-
-  /** Converts a sequence of unicode code points to a sequence of Java characters.
-   *  @return the number of chars written to the destination buffer */
-  public static int toChars(int[] src, int srcOff, int srcLen, char[] dest, int destOff) {
-    if (srcLen < 0) {
-      throw new IllegalArgumentException("srcLen must be >= 0");
-    }
-    int written = 0;
-    for (int i = 0; i < srcLen; ++i) {
-      written += Character.toChars(src[srcOff + i], dest, destOff + written);
-    }
-    return written;
-  }
-
-  /**
-   * Fills the {@link CharacterBuffer} with characters read from the given
-   * reader {@link Reader}. This method tries to read <code>numChars</code>
-   * characters into the {@link CharacterBuffer}, each call to fill will start
-   * filling the buffer from offset <code>0</code> up to <code>numChars</code>.
-   * In case code points can span across 2 java characters, this method may
-   * only fill <code>numChars - 1</code> characters in order not to split in
-   * the middle of a surrogate pair, even if there are remaining characters in
-   * the {@link Reader}.
-   * <p>
-   * This method guarantees
-   * that the given {@link CharacterBuffer} will never contain a high surrogate
-   * character as the last element in the buffer unless it is the last available
-   * character in the reader. In other words, high and low surrogate pairs will
-   * always be preserved across buffer boarders.
-   * </p>
-   * <p>
-   * A return value of <code>false</code> means that this method call exhausted
-   * the reader, but there may be some bytes which have been read, which can be
-   * verified by checking whether <code>buffer.getLength() &gt; 0</code>.
-   * </p>
-   * 
-   * @param buffer
-   *          the buffer to fill.
-   * @param reader
-   *          the reader to read characters from.
-   * @param numChars
-   *          the number of chars to read
-   * @return <code>false</code> if and only if reader.read returned -1 while trying to fill the buffer
-   * @throws IOException
-   *           if the reader throws an {@link IOException}.
-   */
-  public static boolean fill(CharacterBuffer buffer, Reader reader, int numChars) throws IOException {
-    assert buffer.buffer.length >= 2;
-    if (numChars < 2 || numChars > buffer.buffer.length) {
-      throw new IllegalArgumentException("numChars must be >= 2 and <= the buffer size");
-    }
-    final char[] charBuffer = buffer.buffer;
-    buffer.offset = 0;
-    final int offset;
-
-    // Install the previously saved ending high surrogate:
-    if (buffer.lastTrailingHighSurrogate != 0) {
-      charBuffer[0] = buffer.lastTrailingHighSurrogate;
-      buffer.lastTrailingHighSurrogate = 0;
-      offset = 1;
-    } else {
-      offset = 0;
-    }
-
-    final int read = readFully(reader, charBuffer, offset, numChars - offset);
-
-    buffer.length = offset + read;
-    final boolean result = buffer.length == numChars;
-    if (buffer.length < numChars) {
-      // We failed to fill the buffer. Even if the last char is a high
-      // surrogate, there is nothing we can do
-      return result;
-    }
-
-    if (Character.isHighSurrogate(charBuffer[buffer.length - 1])) {
-      buffer.lastTrailingHighSurrogate = charBuffer[--buffer.length];
-    }
-    return result;
-  }
-
-  /** Convenience method which calls <code>fill(buffer, reader, buffer.buffer.length)</code>. */
-  public static boolean fill(CharacterBuffer buffer, Reader reader) throws IOException {
-    return fill(buffer, reader, buffer.buffer.length);
-  }
-
-  static int readFully(Reader reader, char[] dest, int offset, int len) throws IOException {
-    int read = 0;
-    while (read < len) {
-      final int r = reader.read(dest, offset + read, len - read);
-      if (r == -1) {
-        break;
-      }
-      read += r;
-    }
-    return read;
-  }
-
-  /**
-   * A simple IO buffer to use with
-   * {@link CharacterUtils#fill(CharacterBuffer, Reader)}.
-   */
-  public static final class CharacterBuffer {
-    
-    private final char[] buffer;
-    private int offset;
-    private int length;
-    // NOTE: not private so outer class can access without
-    // $access methods:
-    char lastTrailingHighSurrogate;
-    
-    CharacterBuffer(char[] buffer, int offset, int length) {
-      this.buffer = buffer;
-      this.offset = offset;
-      this.length = length;
-    }
-    
-    /**
-     * Returns the internal buffer
-     * 
-     * @return the buffer
-     */
-    public char[] getBuffer() {
-      return buffer;
-    }
-    
-    /**
-     * Returns the data offset in the internal buffer.
-     * 
-     * @return the offset
-     */
-    public int getOffset() {
-      return offset;
-    }
-    
-    /**
-     * Return the length of the data in the internal buffer starting at
-     * {@link #getOffset()}
-     * 
-     * @return the length
-     */
-    public int getLength() {
-      return length;
-    }
-    
-    /**
-     * Resets the CharacterBuffer. All internals are reset to its default
-     * values.
-     */
-    public void reset() {
-      offset = 0;
-      length = 0;
-      lastTrailingHighSurrogate = 0;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
index d7689f9..be5f04c 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilter.java
@@ -19,10 +19,10 @@ package org.apache.lucene.analysis.util;
 
 import java.io.IOException;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
index 31c3027..fff3edc 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/ElisionFilterFactory.java
@@ -20,6 +20,7 @@ package org.apache.lucene.analysis.util;
 import java.io.IOException;
 import java.util.Map;
 
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.fr.FrenchAnalyzer;
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
deleted file mode 100644
index 97d35e2..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/FilteringTokenFilter.java
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.IOException;
-
-import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-
-/**
- * Abstract base class for TokenFilters that may remove tokens.
- * You have to implement {@link #accept} and return a boolean if the current
- * token should be preserved. {@link #incrementToken} uses this method
- * to decide if a token should be passed to the caller.
- */
-public abstract class FilteringTokenFilter extends TokenFilter {
-
-  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
-  private int skippedPositions;
-
-  /**
-   * Create a new {@link FilteringTokenFilter}.
-   * @param in      the {@link TokenStream} to consume
-   */
-  public FilteringTokenFilter(TokenStream in) {
-    super(in);
-  }
-
-  /** Override this method and return if the current input token should be returned by {@link #incrementToken}. */
-  protected abstract boolean accept() throws IOException;
-
-  @Override
-  public final boolean incrementToken() throws IOException {
-    skippedPositions = 0;
-    while (input.incrementToken()) {
-      if (accept()) {
-        if (skippedPositions != 0) {
-          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-        }
-        return true;
-      }
-      skippedPositions += posIncrAtt.getPositionIncrement();
-    }
-
-    // reached EOS -- return false
-    return false;
-  }
-
-  @Override
-  public void reset() throws IOException {
-    super.reset();
-    skippedPositions = 0;
-  }
-
-  @Override
-  public void end() throws IOException {
-    super.end();
-    posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
deleted file mode 100644
index fc6c798..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/StopwordAnalyzerBase.java
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-import java.io.IOException;
-import java.io.Reader;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.Files;
-import java.nio.file.Path;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Base class for Analyzers that need to make use of stopword sets. 
- * 
- */
-public abstract class StopwordAnalyzerBase extends Analyzer {
-
-  /**
-   * An immutable stopword set
-   */
-  protected final CharArraySet stopwords;
-
-  /**
-   * Returns the analyzer's stopword set or an empty set if the analyzer has no
-   * stopwords
-   * 
-   * @return the analyzer's stopword set or an empty set if the analyzer has no
-   *         stopwords
-   */
-  public CharArraySet getStopwordSet() {
-    return stopwords;
-  }
-
-  /**
-   * Creates a new instance initialized with the given stopword set
-   * 
-   * @param stopwords
-   *          the analyzer's stopword set
-   */
-  protected StopwordAnalyzerBase(final CharArraySet stopwords) {
-    // analyzers should use char array set for stopwords!
-    this.stopwords = stopwords == null ? CharArraySet.EMPTY_SET : CharArraySet
-        .unmodifiableSet(CharArraySet.copy(stopwords));
-  }
-
-  /**
-   * Creates a new Analyzer with an empty stopword set
-   */
-  protected StopwordAnalyzerBase() {
-    this(null);
-  }
-
-  /**
-   * Creates a CharArraySet from a file resource associated with a class. (See
-   * {@link Class#getResourceAsStream(String)}).
-   * 
-   * @param ignoreCase
-   *          <code>true</code> if the set should ignore the case of the
-   *          stopwords, otherwise <code>false</code>
-   * @param aClass
-   *          a class that is associated with the given stopwordResource
-   * @param resource
-   *          name of the resource file associated with the given class
-   * @param comment
-   *          comment string to ignore in the stopword file
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         file
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(final boolean ignoreCase,
-      final Class<? extends Analyzer> aClass, final String resource,
-      final String comment) throws IOException {
-    Reader reader = null;
-    try {
-      reader = IOUtils.getDecodingReader(aClass.getResourceAsStream(resource), StandardCharsets.UTF_8);
-      return WordlistLoader.getWordSet(reader, comment, new CharArraySet(16, ignoreCase));
-    } finally {
-      IOUtils.close(reader);
-    }
-    
-  }
-  
-  /**
-   * Creates a CharArraySet from a path.
-   * 
-   * @param stopwords
-   *          the stopwords file to load
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         file
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(Path stopwords) throws IOException {
-    Reader reader = null;
-    try {
-      reader = Files.newBufferedReader(stopwords, StandardCharsets.UTF_8);
-      return WordlistLoader.getWordSet(reader);
-    } finally {
-      IOUtils.close(reader);
-    }
-  }
-  
-  /**
-   * Creates a CharArraySet from a file.
-   * 
-   * @param stopwords
-   *          the stopwords reader to load
-   * 
-   * @return a CharArraySet containing the distinct stopwords from the given
-   *         reader
-   * @throws IOException
-   *           if loading the stopwords throws an {@link IOException}
-   */
-  protected static CharArraySet loadStopwordSet(Reader stopwords) throws IOException {
-    try {
-      return WordlistLoader.getWordSet(stopwords);
-    } finally {
-      IOUtils.close(stopwords);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
deleted file mode 100644
index 4d99965..0000000
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/util/WordlistLoader.java
+++ /dev/null
@@ -1,244 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.util;
-
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.nio.charset.Charset;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.lucene.util.IOUtils;
-
-/**
- * Loader for text files that represent a list of stopwords.
- * 
- * @see IOUtils to obtain {@link Reader} instances
- * @lucene.internal
- */
-public class WordlistLoader {
-  
-  private static final int INITIAL_CAPACITY = 16;
-  
-  /** no instance */
-  private WordlistLoader() {}
-  
-  /**
-   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, CharArraySet result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String word = null;
-      while ((word = br.readLine()) != null) {
-        result.add(word.trim());
-      }
-    }
-    finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Reads lines from a Reader and adds every line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @return A {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader) throws IOException {
-    return getWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-  /**
-   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param comment The string representing a comment.
-   * @return A CharArraySet with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, String comment) throws IOException {
-    return getWordSet(reader, comment, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-  /**
-   * Reads lines from a Reader and adds every non-comment line as an entry to a CharArraySet (omitting
-   * leading and trailing whitespace). Every line of the Reader should contain only
-   * one word. The words need to be in lowercase if you make use of an
-   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
-   *
-   * @param reader Reader containing the wordlist
-   * @param comment The string representing a comment.
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getWordSet(Reader reader, String comment, CharArraySet result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String word = null;
-      while ((word = br.readLine()) != null) {
-        if (word.startsWith(comment) == false){
-          result.add(word.trim());
-        }
-      }
-    }
-    finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-
-  
-  /**
-   * Reads stopwords from a stopword list in Snowball format.
-   * <p>
-   * The snowball format is the following:
-   * <ul>
-   * <li>Lines may contain multiple words separated by whitespace.
-   * <li>The comment character is the vertical line (&#124;).
-   * <li>Lines may contain trailing comments.
-   * </ul>
-   * 
-   * @param reader Reader containing a Snowball stopword list
-   * @param result the {@link CharArraySet} to fill with the readers words
-   * @return the given {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getSnowballWordSet(Reader reader, CharArraySet result)
-      throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String line = null;
-      while ((line = br.readLine()) != null) {
-        int comment = line.indexOf('|');
-        if (comment >= 0) line = line.substring(0, comment);
-        String words[] = line.split("\\s+");
-        for (int i = 0; i < words.length; i++)
-          if (words[i].length() > 0) result.add(words[i]);
-      }
-    } finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Reads stopwords from a stopword list in Snowball format.
-   * <p>
-   * The snowball format is the following:
-   * <ul>
-   * <li>Lines may contain multiple words separated by whitespace.
-   * <li>The comment character is the vertical line (&#124;).
-   * <li>Lines may contain trailing comments.
-   * </ul>
-   * 
-   * @param reader Reader containing a Snowball stopword list
-   * @return A {@link CharArraySet} with the reader's words
-   */
-  public static CharArraySet getSnowballWordSet(Reader reader) throws IOException {
-    return getSnowballWordSet(reader, new CharArraySet(INITIAL_CAPACITY, false));
-  }
-
-
-  /**
-   * Reads a stem dictionary. Each line contains:
-   * <pre>word<b>\t</b>stem</pre>
-   * (i.e. two tab separated words)
-   *
-   * @return stem dictionary that overrules the stemming algorithm
-   * @throws IOException If there is a low-level I/O error.
-   */
-  public static CharArrayMap<String> getStemDict(Reader reader, CharArrayMap<String> result) throws IOException {
-    BufferedReader br = null;
-    try {
-      br = getBufferedReader(reader);
-      String line;
-      while ((line = br.readLine()) != null) {
-        String[] wordstem = line.split("\t", 2);
-        result.put(wordstem[0], wordstem[1]);
-      }
-    } finally {
-      IOUtils.close(br);
-    }
-    return result;
-  }
-  
-  /**
-   * Accesses a resource by name and returns the (non comment) lines containing
-   * data using the given character encoding.
-   *
-   * <p>
-   * A comment line is any line that starts with the character "#"
-   * </p>
-   *
-   * @return a list of non-blank non-comment lines with whitespace trimmed
-   * @throws IOException If there is a low-level I/O error.
-   */
-  public static List<String> getLines(InputStream stream, Charset charset) throws IOException{
-    BufferedReader input = null;
-    ArrayList<String> lines;
-    boolean success = false;
-    try {
-      input = getBufferedReader(IOUtils.getDecodingReader(stream, charset));
-
-      lines = new ArrayList<>();
-      for (String word=null; (word=input.readLine())!=null;) {
-        // skip initial bom marker
-        if (lines.isEmpty() && word.length() > 0 && word.charAt(0) == '\uFEFF')
-          word = word.substring(1);
-        // skip comments
-        if (word.startsWith("#")) continue;
-        word=word.trim();
-        // skip blank lines
-        if (word.length()==0) continue;
-        lines.add(word);
-      }
-      success = true;
-      return lines;
-    } finally {
-      if (success) {
-        IOUtils.close(input);
-      } else {
-        IOUtils.closeWhileHandlingException(input);
-      }
-    }
-  }
-  
-  private static BufferedReader getBufferedReader(Reader reader) {
-    return (reader instanceof BufferedReader) ? (BufferedReader) reader
-        : new BufferedReader(reader);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
index 19a1c7e..e56071a 100644
--- a/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/collation/package-info.java
@@ -36,7 +36,7 @@
  *   </li>
  *   <li>
  *     Effective Locale-specific normalization (case differences, diacritics, etc.).
- *     ({@link org.apache.lucene.analysis.core.LowerCaseFilter} and 
+ *     ({@link org.apache.lucene.analysis.LowerCaseFilter} and 
  *     {@link org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter} provide these services
  *     in a generic way that doesn't take into account locale-specific needs.)
  *   </li>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
index 9842687..2a6a1c7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicAnalyzer.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ar;
 import java.io.IOException;
 
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
index ca15485..872e7f5 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ar/TestArabicStemFilter.java
@@ -21,11 +21,11 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Arabic Normalization Filter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
index 7f890b9..582d8e4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 /**
  * Test the Bulgarian analyzer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
index daad7bb..2538717 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/bg/TestBulgarianStemmer.java
@@ -22,11 +22,11 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Bulgarian Stemmer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
index a05dd0b..550a62a 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/br/TestBrazilianAnalyzer.java
@@ -22,11 +22,11 @@ import java.io.StringReader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
 import org.apache.lucene.analysis.core.LowerCaseTokenizer;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Test the Brazilian Stem Filter, which only modifies the term text.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
index fd65332..289f22b 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ca/TestCatalanAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 
 public class TestCatalanAnalyzer extends BaseTokenStreamTestCase {
   /** This test fails with NPE when the 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
index 72c510c..1a47e42 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKAnalyzer.java
@@ -22,17 +22,17 @@ import java.io.Reader;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
+import org.apache.lucene.analysis.StopFilter;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.core.StopFilter;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Most tests adopted from TestCJKTokenizer

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
index d08817c..dcb083d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/ckb/TestSoraniAnalyzer.java
@@ -21,7 +21,7 @@ import java.io.IOException;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.Version;
 
 /**

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
index 1171574..e940489 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/CommonGramsFilterTest.java
@@ -20,9 +20,9 @@ import java.io.StringReader;
 import java.util.Arrays;
 
 import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.core.WhitespaceTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-import org.apache.lucene.analysis.util.CharArraySet;
 
 /**
  * Tests CommonGrams(Query)Filter

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
index 98c351e..5bcfb3d 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsFilterFactory.java
@@ -17,18 +17,18 @@
 package org.apache.lucene.analysis.commongrams;
 
 
+import java.io.StringReader;
+
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.core.TestStopFilterFactory;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;
 
-import java.io.StringReader;
-
 /**
  * Tests pretty much copied from StopFilterFactoryTest We use the test files
  * used by the StopFilterFactoryTest TODO: consider creating separate test files
@@ -37,7 +37,7 @@ import java.io.StringReader;
 public class TestCommonGramsFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testInform() throws Exception {
-    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
+    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
     assertTrue("loader is null and it shouldn't be", loader != null);
     CommonGramsFilterFactory factory = (CommonGramsFilterFactory) tokenFilterFactory("CommonGrams", Version.LATEST, loader,
         "words", "stop-1.txt", 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
index 776365e..23d1bd4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/commongrams/TestCommonGramsQueryFilterFactory.java
@@ -16,12 +16,11 @@
  */
 package org.apache.lucene.analysis.commongrams;
 
-
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.core.TestStopFilter;
+import org.apache.lucene.analysis.core.TestStopFilterFactory;
 import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.util.ClasspathResourceLoader;
 import org.apache.lucene.analysis.util.ResourceLoader;
 import org.apache.lucene.util.Version;
@@ -34,7 +33,7 @@ import org.apache.lucene.util.Version;
 public class TestCommonGramsQueryFilterFactory extends BaseTokenStreamFactoryTestCase {
 
   public void testInform() throws Exception {
-    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilter.class);
+    ResourceLoader loader = new ClasspathResourceLoader(TestStopFilterFactory.class);
     assertTrue("loader is null and it shouldn't be", loader != null);
     CommonGramsQueryFilterFactory factory = (CommonGramsQueryFilterFactory) tokenFilterFactory("CommonGramsQuery", Version.LATEST, loader,
         "words", "stop-1.txt", 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ba922148/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 636d9ba..ed3abe4 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -24,6 +24,7 @@ import java.util.Arrays;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.analysis.MockTokenizer;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -32,7 +33,6 @@ import org.apache.lucene.analysis.charfilter.MappingCharFilter;
 import org.apache.lucene.analysis.charfilter.NormalizeCharMap;
 import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
 import org.apache.lucene.analysis.core.KeywordTokenizer;
-import org.apache.lucene.analysis.util.CharArraySet;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;