You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2014/11/08 00:12:08 UTC
[04/34] lucenenet git commit: Raw porting of
Lucene.Net.Analysis.Common
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
new file mode 100644
index 0000000..c0a52c6
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -0,0 +1,151 @@
+using System;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+ /// <summary>
+ /// Normalizes Turkish token text to lower case.
+ /// <para>
+ /// Turkish and Azeri have unique casing behavior for some characters. This
+ /// filter applies Turkish lowercase rules. For more information, see <a
+ /// href="http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I"
+ /// >http://en.wikipedia.org/wiki/Turkish_dotted_and_dotless_I</a>
+ /// </para>
+ /// </summary>
+ public sealed class TurkishLowerCaseFilter : TokenFilter
+ {
+ private const int LATIN_CAPITAL_LETTER_I = '\u0049';
+ private const int LATIN_SMALL_LETTER_I = '\u0069';
+ private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
+ private const int COMBINING_DOT_ABOVE = '\u0307';
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+
+ /// <summary>
+ /// Create a new TurkishLowerCaseFilter, that normalizes Turkish token text
+ /// to lower case.
+ /// </summary>
+ /// <param name="in"> TokenStream to filter </param>
+ public TurkishLowerCaseFilter(TokenStream @in) : base(@in)
+ {
+ }
+
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
+ public override bool incrementToken()
+ {
+ bool iOrAfter = false;
+
+ if (input.incrementToken())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
+ char[] buffer = termAtt.buffer();
+ int length = termAtt.length();
+ for (int i = 0; i < length;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(buffer, i, length);
+ int ch = char.codePointAt(buffer, i, length);
+
+ iOrAfter = (ch == LATIN_CAPITAL_LETTER_I || (iOrAfter && char.getType(ch) == char.NON_SPACING_MARK));
+
+ if (iOrAfter) // all the special I turkish handling happens here.
+ {
+ switch (ch)
+ {
+ // remove COMBINING_DOT_ABOVE to mimic composed lowercase
+ case COMBINING_DOT_ABOVE:
+ length = delete(buffer, i, length);
+ continue;
+ // i itself, it depends if it is followed by COMBINING_DOT_ABOVE
+ // if it is, we will make it small i and later remove the dot
+ case LATIN_CAPITAL_LETTER_I:
+ if (isBeforeDot(buffer, i + 1, length))
+ {
+ buffer[i] = (char)LATIN_SMALL_LETTER_I;
+ }
+ else
+ {
+ buffer[i] = (char)LATIN_SMALL_LETTER_DOTLESS_I;
+ // below is an optimization. no COMBINING_DOT_ABOVE follows,
+ // so don't waste time calculating Character.getType(), etc
+ iOrAfter = false;
+ }
+ i++;
+ continue;
+ }
+ }
+
+ i += char.toChars(char.ToLower(ch), buffer, i);
+ }
+
+ termAtt.Length = length;
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+
+
+ /// <summary>
+ /// lookahead for a combining dot above.
+ /// other NSMs may be in between.
+ /// </summary>
+ private bool isBeforeDot(char[] s, int pos, int len)
+ {
+ for (int i = pos; i < len;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int ch = Character.codePointAt(s, i, len);
+ int ch = char.codePointAt(s, i, len);
+ if (char.getType(ch) != char.NON_SPACING_MARK)
+ {
+ return false;
+ }
+ if (ch == COMBINING_DOT_ABOVE)
+ {
+ return true;
+ }
+ i += char.charCount(ch);
+ }
+
+ return false;
+ }
+
+ /// <summary>
+ /// delete a character in-place.
+ /// rarely happens, only if COMBINING_DOT_ABOVE is found after an i
+ /// </summary>
+ private int delete(char[] s, int pos, int len)
+ {
+ if (pos < len)
+ {
+ Array.Copy(s, pos + 1, s, pos, len - pos - 1);
+ }
+
+ return len - 1;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
new file mode 100644
index 0000000..7edf5e2
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilterFactory.cs
@@ -0,0 +1,64 @@
+using System.Collections.Generic;
+
+namespace org.apache.lucene.analysis.tr
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ using AbstractAnalysisFactory = org.apache.lucene.analysis.util.AbstractAnalysisFactory;
+ using MultiTermAwareComponent = org.apache.lucene.analysis.util.MultiTermAwareComponent;
+ using TokenFilterFactory = org.apache.lucene.analysis.util.TokenFilterFactory;
+
+ /// <summary>
+ /// Factory for <seealso cref="TurkishLowerCaseFilter"/>.
+ /// <pre class="prettyprint">
+ /// <fieldType name="text_trlwr" class="solr.TextField" positionIncrementGap="100">
+ /// <analyzer>
+ /// <tokenizer class="solr.StandardTokenizerFactory"/>
+ /// <filter class="solr.TurkishLowerCaseFilterFactory"/>
+ /// </analyzer>
+ /// </fieldType></pre>
+ /// </summary>
+ public class TurkishLowerCaseFilterFactory : TokenFilterFactory, MultiTermAwareComponent
+ {
+
+ /// <summary>
+ /// Creates a new TurkishLowerCaseFilterFactory </summary>
+ public TurkishLowerCaseFilterFactory(IDictionary<string, string> args) : base(args)
+ {
+ if (args.Count > 0)
+ {
+ throw new System.ArgumentException("Unknown parameters: " + args);
+ }
+ }
+
+ public override TokenStream create(TokenStream input)
+ {
+ return new TurkishLowerCaseFilter(input);
+ }
+
+ public virtual AbstractAnalysisFactory MultiTermComponent
+ {
+ get
+ {
+ return this;
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
new file mode 100644
index 0000000..8cf5e28
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -0,0 +1,406 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Core;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Abstract parent class for analysis factories <seealso cref="TokenizerFactory"/>,
+ /// <seealso cref="TokenFilterFactory"/> and <seealso cref="CharFilterFactory"/>.
+ /// <para>
+ /// The typical lifecycle for a factory consumer is:
+ /// <ol>
+ /// <li>Create factory via its constructor (or via XXXFactory.forName)
+ /// <li>(Optional) If the factory uses resources such as files, <seealso cref="ResourceLoaderAware#inform(ResourceLoader)"/> is called to initialize those resources.
+ /// <li>Consumer calls create() to obtain instances.
+ /// </ol>
+ /// </para>
+ /// </summary>
+ public abstract class AbstractAnalysisFactory
+ {
+ public const string LUCENE_MATCH_VERSION_PARAM = "luceneMatchVersion";
+
+ /// <summary>
+ /// The original args, before any processing </summary>
+ private readonly IDictionary<string, string> originalArgs;
+
+ /// <summary>
+ /// the luceneVersion arg </summary>
+ protected internal readonly Lucene.Net.Util.Version luceneMatchVersion;
+ /// <summary>
+ /// whether the luceneMatchVersion arg is explicitly specified in the serialized schema </summary>
+ private bool isExplicitLuceneMatchVersion = false;
+
+ /// <summary>
+ /// Initialize this factory via a set of key-value pairs.
+ /// </summary>
+ protected internal AbstractAnalysisFactory(IDictionary<string, string> args)
+ {
+ originalArgs = Collections.UnmodifiableMap(new Dictionary<>(args));
+ string version = get(args, LUCENE_MATCH_VERSION_PARAM);
+ luceneMatchVersion = version == null ? null : Version.ParseLeniently(version);
+ args.Remove(CLASS_NAME); // consume the class arg
+ }
+
+ public IDictionary<string, string> OriginalArgs
+ {
+ get
+ {
+ return originalArgs;
+ }
+ }
+
+ /// <summary>
+ /// this method can be called in the <seealso cref="TokenizerFactory#create(java.io.Reader)"/>
+ /// or <seealso cref="TokenFilterFactory#create(org.apache.lucene.analysis.TokenStream)"/> methods,
+ /// to inform user, that for this factory a <seealso cref="#luceneMatchVersion"/> is required
+ /// </summary>
+ protected internal void assureMatchVersion()
+ {
+ if (luceneMatchVersion == null)
+ {
+//JAVA TO C# CONVERTER WARNING: The .NET Type.FullName property will not always yield results identical to the Java Class.getName method:
+ throw new System.ArgumentException("Configuration Error: Factory '" + this.GetType().FullName + "' needs a 'luceneMatchVersion' parameter");
+ }
+ }
+
+ public Version LuceneMatchVersion
+ {
+ get
+ {
+ return this.luceneMatchVersion;
+ }
+ }
+
+ public virtual string require(IDictionary<string, string> args, string name)
+ {
+ string s = args.Remove(name);
+ if (s == null)
+ {
+ throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+ }
+ return s;
+ }
+ public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+ {
+ return require(args, name, allowedValues, true);
+ }
+ public virtual string require(IDictionary<string, string> args, string name, ICollection<string> allowedValues, bool caseSensitive)
+ {
+ string s = args.Remove(name);
+ if (s == null)
+ {
+ throw new System.ArgumentException("Configuration Error: missing parameter '" + name + "'");
+ }
+ else
+ {
+ foreach (string allowedValue in allowedValues)
+ {
+ if (caseSensitive)
+ {
+ if (s.Equals(allowedValue))
+ {
+ return s;
+ }
+ }
+ else
+ {
+ if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+ {
+ return s;
+ }
+ }
+ }
+ throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+ }
+ }
+ public virtual string get(IDictionary<string, string> args, string name)
+ {
+ return args.Remove(name); // defaultVal = null
+ }
+ public virtual string get(IDictionary<string, string> args, string name, string defaultVal)
+ {
+ string s = args.Remove(name);
+ return s == null ? defaultVal : s;
+ }
+ public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues)
+ {
+ return get(args, name, allowedValues, null); // defaultVal = null
+ }
+ public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal)
+ {
+ return get(args, name, allowedValues, defaultVal, true);
+ }
+ public virtual string get(IDictionary<string, string> args, string name, ICollection<string> allowedValues, string defaultVal, bool caseSensitive)
+ {
+ string s = args.Remove(name);
+ if (s == null)
+ {
+ return defaultVal;
+ }
+ else
+ {
+ foreach (string allowedValue in allowedValues)
+ {
+ if (caseSensitive)
+ {
+ if (s.Equals(allowedValue))
+ {
+ return s;
+ }
+ }
+ else
+ {
+ if (s.Equals(allowedValue, StringComparison.CurrentCultureIgnoreCase))
+ {
+ return s;
+ }
+ }
+ }
+ throw new System.ArgumentException("Configuration Error: '" + name + "' value must be one of " + allowedValues);
+ }
+ }
+
+ protected internal int requireInt(IDictionary<string, string> args, string name)
+ {
+ return int.Parse(require(args, name));
+ }
+ protected internal int getInt(IDictionary<string, string> args, string name, int defaultVal)
+ {
+ string s = args.Remove(name);
+ return s == null ? defaultVal : int.Parse(s);
+ }
+
+ protected internal bool requireBoolean(IDictionary<string, string> args, string name)
+ {
+ return bool.Parse(require(args, name));
+ }
+ protected internal bool getBoolean(IDictionary<string, string> args, string name, bool defaultVal)
+ {
+ string s = args.Remove(name);
+ return s == null ? defaultVal : bool.Parse(s);
+ }
+
+ protected internal float requireFloat(IDictionary<string, string> args, string name)
+ {
+ return float.Parse(require(args, name));
+ }
+ protected internal float getFloat(IDictionary<string, string> args, string name, float defaultVal)
+ {
+ string s = args.Remove(name);
+ return s == null ? defaultVal : float.Parse(s);
+ }
+
+ public virtual char requireChar(IDictionary<string, string> args, string name)
+ {
+ return require(args, name)[0];
+ }
+ public virtual char getChar(IDictionary<string, string> args, string name, char defaultValue)
+ {
+ string s = args.Remove(name);
+ if (s == null)
+ {
+ return defaultValue;
+ }
+ else
+ {
+ if (s.Length != 1)
+ {
+ throw new System.ArgumentException(name + " should be a char. \"" + s + "\" is invalid");
+ }
+ else
+ {
+ return s[0];
+ }
+ }
+ }
+
+ private static readonly Pattern ITEM_PATTERN = Pattern.compile("[^,\\s]+");
+
+ /// <summary>
+ /// Returns whitespace- and/or comma-separated set of values, or null if none are found </summary>
+ public virtual HashSet<string> getSet(IDictionary<string, string> args, string name)
+ {
+ string s = args.Remove(name);
+ if (s == null)
+ {
+ return null;
+ }
+ else
+ {
+ HashSet<string> set = null;
+ Matcher matcher = ITEM_PATTERN.matcher(s);
+ if (matcher.find())
+ {
+ set = new HashSet<>();
+ set.Add(matcher.group(0));
+ while (matcher.find())
+ {
+ set.Add(matcher.group(0));
+ }
+ }
+ return set;
+ }
+ }
+
+ /// <summary>
+ /// Compiles a pattern for the value of the specified argument key <code>name</code>
+ /// </summary>
+ protected internal Pattern GetPattern(IDictionary<string, string> args, string name)
+ {
+ try
+ {
+ return Pattern.compile(require(args, name));
+ }
+ catch (PatternSyntaxException e)
+ {
+ throw new System.ArgumentException("Configuration Error: '" + name + "' can not be parsed in " + this.GetType().Name, e);
+ }
+ }
+
+ /// <summary>
+ /// Returns as <seealso cref="CharArraySet"/> from wordFiles, which
+ /// can be a comma-separated list of filenames
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+ protected internal CharArraySet GetWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+ {
+ assureMatchVersion();
+ IList<string> files = splitFileNames(wordFiles);
+ CharArraySet words = null;
+ if (files.Count > 0)
+ {
+ // default stopwords list has 35 or so words, but maybe don't make it that
+ // big to start
+ words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+ foreach (string file in files)
+ {
+ IList<string> wlist = getLines(loader, file.Trim());
+ words.addAll(StopFilter.makeStopSet(luceneMatchVersion, wlist, ignoreCase));
+ }
+ }
+ return words;
+ }
+
+ /// <summary>
+ /// Returns the resource's lines (with content treated as UTF-8)
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final java.util.List<String> getLines(ResourceLoader loader, String resource) throws java.io.IOException
+ protected internal IList<string> getLines(ResourceLoader loader, string resource)
+ {
+ return WordlistLoader.getLines(loader.openResource(resource), StandardCharsets.UTF_8);
+ }
+
+ /// <summary>
+ /// same as <seealso cref="#getWordSet(ResourceLoader, String, boolean)"/>,
+ /// except the input is in snowball format.
+ /// </summary>
+//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
+//ORIGINAL LINE: protected final CharArraySet getSnowballWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws java.io.IOException
+ protected internal CharArraySet getSnowballWordSet(ResourceLoader loader, string wordFiles, bool ignoreCase)
+ {
+ assureMatchVersion();
+ IList<string> files = splitFileNames(wordFiles);
+ CharArraySet words = null;
+ if (files.Count > 0)
+ {
+ // default stopwords list has 35 or so words, but maybe don't make it that
+ // big to start
+ words = new CharArraySet(luceneMatchVersion, files.Count * 10, ignoreCase);
+ foreach (string file in files)
+ {
+ InputStream stream = null;
+ Reader reader = null;
+ try
+ {
+ stream = loader.openResource(file.Trim());
+ CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
+ reader = new InputStreamReader(stream, decoder);
+ WordlistLoader.getSnowballWordSet(reader, words);
+ }
+ finally
+ {
+ IOUtils.closeWhileHandlingException(reader, stream);
+ }
+ }
+ }
+ return words;
+ }
+
+ /// <summary>
+ /// Splits file names separated by comma character.
+ /// File names can contain comma characters escaped by backslash '\'
+ /// </summary>
+ /// <param name="fileNames"> the string containing file names </param>
+ /// <returns> a list of file names with the escaping backslashed removed </returns>
+ protected internal IList<string> splitFileNames(string fileNames)
+ {
+ if (fileNames == null)
+ {
+ return System.Linq.Enumerable.Empty<string>();
+ }
+
+ IList<string> result = new List<string>();
+ foreach (string file in fileNames.Split("(?<!\\\\),", true))
+ {
+ result.Add(file.replaceAll("\\\\(?=,)", ""));
+ }
+
+ return result;
+ }
+
+ private const string CLASS_NAME = "class";
+
+ /// <returns> the string used to specify the concrete class name in a serialized representation: the class arg.
+ /// If the concrete class name was not specified via a class arg, returns {@code getClass().getName()}. </returns>
+ public virtual string ClassArg
+ {
+ get
+ {
+ if (null != originalArgs)
+ {
+ string className = originalArgs[CLASS_NAME];
+ if (null != className)
+ {
+ return className;
+ }
+ }
+ return this.GetType().Name;
+ }
+ }
+
+ public virtual bool ExplicitLuceneMatchVersion
+ {
+ get
+ {
+ return isExplicitLuceneMatchVersion;
+ }
+ set
+ {
+ this.isExplicitLuceneMatchVersion = value;
+ }
+ }
+
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
new file mode 100644
index 0000000..351446f
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AnalysisSPILoader.cs
@@ -0,0 +1,165 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Threading;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using SPIClassIterator = org.apache.lucene.util.SPIClassIterator;
+
+ /// <summary>
+ /// Helper class for loading named SPIs from classpath (e.g. Tokenizers, TokenStreams).
+ /// @lucene.internal
+ /// </summary>
+ internal sealed class AnalysisSPILoader<S> where S : AbstractAnalysisFactory
+ {
+
+ private volatile IDictionary<string, Type> services = Collections.emptyMap();
+ private readonly Type clazz;
+ private readonly string[] suffixes;
+
+ public AnalysisSPILoader(Type clazz) : this(clazz, new string[] {clazz.SimpleName})
+ {
+ }
+
+ public AnalysisSPILoader(Type clazz, ClassLoader loader) : this(clazz, new string[] {clazz.SimpleName}, loader)
+ {
+ }
+
+ public AnalysisSPILoader(Type clazz, string[] suffixes) : this(clazz, suffixes, Thread.CurrentThread.ContextClassLoader)
+ {
+ }
+
+ public AnalysisSPILoader(Type clazz, string[] suffixes, ClassLoader classloader)
+ {
+ this.clazz = clazz;
+ this.suffixes = suffixes;
+ // if clazz' classloader is not a parent of the given one, we scan clazz's classloader, too:
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final ClassLoader clazzClassloader = clazz.getClassLoader();
+ ClassLoader clazzClassloader = clazz.ClassLoader;
+ if (clazzClassloader != null && !SPIClassIterator.isParentClassLoader(clazzClassloader, classloader))
+ {
+ reload(clazzClassloader);
+ }
+ reload(classloader);
+ }
+
+ /// <summary>
+ /// Reloads the internal SPI list from the given <seealso cref="ClassLoader"/>.
+ /// Changes to the service list are visible after the method ends, all
+ /// iterators (e.g., from <seealso cref="#availableServices()"/>,...) stay consistent.
+ ///
+ /// <para><b>NOTE:</b> Only new service providers are added, existing ones are
+ /// never removed or replaced.
+ ///
+ /// </para>
+ /// <para><em>This method is expensive and should only be called for discovery
+ /// of new service providers on the given classpath/classloader!</em>
+ /// </para>
+ /// </summary>
+ public void reload(ClassLoader classloader)
+ {
+ lock (this)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.LinkedHashMap<String,Class> services = new java.util.LinkedHashMap<>(this.services);
+ LinkedHashMap<string, Type> services = new LinkedHashMap<string, Type>(this.services);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final org.apache.lucene.util.SPIClassIterator<S> loader = org.apache.lucene.util.SPIClassIterator.get(clazz, classloader);
+ SPIClassIterator<S> loader = SPIClassIterator.get(clazz, classloader);
+ while (loader.hasNext())
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = loader.next();
+ Type service = loader.next();
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final String clazzName = service.getSimpleName();
+ string clazzName = service.SimpleName;
+ string name = null;
+ foreach (string suffix in suffixes)
+ {
+ if (clazzName.EndsWith(suffix, StringComparison.Ordinal))
+ {
+ name = clazzName.Substring(0, clazzName.Length - suffix.Length).ToLower(Locale.ROOT);
+ break;
+ }
+ }
+ if (name == null)
+ {
+ throw new ServiceConfigurationError("The class name " + service.Name + " has wrong suffix, allowed are: " + Arrays.ToString(suffixes));
+ }
+ // only add the first one for each name, later services will be ignored
+ // this allows to place services before others in classpath to make
+ // them used instead of others
+ //
+ // TODO: Should we disallow duplicate names here?
+ // Allowing it may get confusing on collisions, as different packages
+ // could contain same factory class, which is a naming bug!
+ // When changing this be careful to allow reload()!
+ if (!services.containsKey(name))
+ {
+ services.put(name, service);
+ }
+ }
+ this.services = Collections.unmodifiableMap(services);
+ }
+ }
+
+ public S newInstance(string name, IDictionary<string, string> args)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = lookupClass(name);
+ Type service = lookupClass(name);
+ try
+ {
+ return service.getConstructor(typeof(IDictionary)).newInstance(args);
+ }
+ catch (Exception e)
+ {
+ throw new System.ArgumentException("SPI class of type " + clazz.Name + " with name '" + name + "' cannot be instantiated. " + "This is likely due to a misconfiguration of the java class '" + service.Name + "': ", e);
+ }
+ }
+
+ public Type lookupClass(string name)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Class service = services.get(name.toLowerCase(java.util.Locale.ROOT));
+ Type service = services[name.ToLower(Locale.ROOT)];
+ if (service != null)
+ {
+ return service;
+ }
+ else
+ {
+ throw new System.ArgumentException("A SPI class of type " + clazz.Name + " with name '" + name + "' does not exist. " + "You need to add the corresponding JAR file supporting this SPI to your classpath. " + "The current classpath supports the following names: " + availableServices());
+ }
+ }
+
+ public HashSet<string> availableServices()
+ {
+ return services.Keys;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
new file mode 100644
index 0000000..1d1c44b
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayIterator.cs
@@ -0,0 +1,278 @@
+using System;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// A CharacterIterator used internally for use with <seealso cref="BreakIterator"/>
+ /// @lucene.internal
+ /// </summary>
+ public abstract class CharArrayIterator : CharacterIterator
+ {
+ private char[] array;
+ private int start;
+ private int index;
+ private int length;
+ private int limit;
+
+ public virtual char [] Text
+ {
+ get
+ {
+ return array;
+ }
+ }
+
+ public virtual int Start
+ {
+ get
+ {
+ return start;
+ }
+ }
+
+ public virtual int Length
+ {
+ get
+ {
+ return length;
+ }
+ }
+
+ /// <summary>
+ /// Set a new region of text to be examined by this iterator
+ /// </summary>
+ /// <param name="array"> text buffer to examine </param>
+ /// <param name="start"> offset into buffer </param>
+ /// <param name="length"> maximum length to examine </param>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public void setText(final char array[] , int start, int length)
+ public virtual void setText(char[] array, int start, int length)
+ {
+ this.array = array;
+ this.start = start;
+ this.index = start;
+ this.length = length;
+ this.limit = start + length;
+ }
+
+ public override char current()
+ {
+ return (index == limit) ? DONE : jreBugWorkaround(array[index]);
+ }
+
+ protected internal abstract char jreBugWorkaround(char ch);
+
+ public override char first()
+ {
+ index = start;
+ return current();
+ }
+
+ public override int BeginIndex
+ {
+ get
+ {
+ return 0;
+ }
+ }
+
+ public override int EndIndex
+ {
+ get
+ {
+ return length;
+ }
+ }
+
+ public override int Index
+ {
+ get
+ {
+ return index - start;
+ }
+ }
+
+ public override char last()
+ {
+ index = (limit == start) ? limit : limit - 1;
+ return current();
+ }
+
+ public override char next()
+ {
+ if (++index >= limit)
+ {
+ index = limit;
+ return DONE;
+ }
+ else
+ {
+ return current();
+ }
+ }
+
+ public override char previous()
+ {
+ if (--index < start)
+ {
+ index = start;
+ return DONE;
+ }
+ else
+ {
+ return current();
+ }
+ }
+
+ public override char setIndex(int position)
+ {
+ if (position < BeginIndex || position > EndIndex)
+ {
+ throw new System.ArgumentException("Illegal Position: " + position);
+ }
+ index = start + position;
+ return current();
+ }
+
+ public override CharArrayIterator clone()
+ {
+ try
+ {
+ return (CharArrayIterator)base.clone();
+ }
+ catch (CloneNotSupportedException e)
+ {
+ // CharacterIterator does not allow you to throw CloneNotSupported
+ throw new Exception(e);
+ }
+ }
+
+ /// <summary>
+ /// Create a new CharArrayIterator that works around JRE bugs
+ /// in a manner suitable for <seealso cref="BreakIterator#getSentenceInstance()"/>
+ /// </summary>
+ public static CharArrayIterator newSentenceInstance()
+ {
+ if (HAS_BUGGY_BREAKITERATORS)
+ {
+ return new CharArrayIteratorAnonymousInnerClassHelper();
+ }
+ else
+ {
+ return new CharArrayIteratorAnonymousInnerClassHelper2();
+ }
+ }
+
+ private class CharArrayIteratorAnonymousInnerClassHelper : CharArrayIterator
+ {
+ public CharArrayIteratorAnonymousInnerClassHelper()
+ {
+ }
+
+ // work around this for now by lying about all surrogates to
+ // the sentence tokenizer, instead we treat them all as
+ // SContinue so we won't break around them.
+ protected internal override char jreBugWorkaround(char ch)
+ {
+ return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+ }
+ }
+
+ private class CharArrayIteratorAnonymousInnerClassHelper2 : CharArrayIterator
+ {
+ public CharArrayIteratorAnonymousInnerClassHelper2()
+ {
+ }
+
+ // no bugs
+ protected internal override char jreBugWorkaround(char ch)
+ {
+ return ch;
+ }
+ }
+
+ /// <summary>
+ /// Create a new CharArrayIterator that works around JRE bugs
+ /// in a manner suitable for <seealso cref="BreakIterator#getWordInstance()"/>
+ /// </summary>
+ public static CharArrayIterator newWordInstance()
+ {
+ if (HAS_BUGGY_BREAKITERATORS)
+ {
+ return new CharArrayIteratorAnonymousInnerClassHelper3();
+ }
+ else
+ {
+ return new CharArrayIteratorAnonymousInnerClassHelper4();
+ }
+ }
+
+ private class CharArrayIteratorAnonymousInnerClassHelper3 : CharArrayIterator
+ {
+ public CharArrayIteratorAnonymousInnerClassHelper3()
+ {
+ }
+
+ // work around this for now by lying about all surrogates to the word,
+ // instead we treat them all as ALetter so we won't break around them.
+ protected internal override char jreBugWorkaround(char ch)
+ {
+ return ch >= 0xD800 && ch <= 0xDFFF ? 0x0041 : ch;
+ }
+ }
+
+ private class CharArrayIteratorAnonymousInnerClassHelper4 : CharArrayIterator
+ {
+ public CharArrayIteratorAnonymousInnerClassHelper4()
+ {
+ }
+
+ // no bugs
+ protected internal override char jreBugWorkaround(char ch)
+ {
+ return ch;
+ }
+ }
+
+ /// <summary>
+ /// True if this JRE has a buggy BreakIterator implementation
+ /// </summary>
+ public static readonly bool HAS_BUGGY_BREAKITERATORS;
+ static CharArrayIterator()
+ {
+ bool v;
+ try
+ {
+ BreakIterator bi = BreakIterator.getSentenceInstance(Locale.US);
+ bi.Text = "\udb40\udc53";
+ bi.next();
+ v = false;
+ }
+ catch (Exception)
+ {
+ v = true;
+ }
+ HAS_BUGGY_BREAKITERATORS = v;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
new file mode 100644
index 0000000..1086572
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArrayMap.cs
@@ -0,0 +1,928 @@
+using System;
+using System.Diagnostics;
+using System.Collections;
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using Version = org.apache.lucene.util.Version;
+
+
+ /// <summary>
+ /// A simple class that stores key Strings as char[]'s in a
+ /// hash table. Note that this is not a general purpose
+ /// class. For example, it cannot remove items from the
+ /// map, nor does it resize its hash table to be smaller,
+ /// etc. It is designed to be quick to retrieve items
+ /// by char[] keys without the necessity of converting
+ /// to a String first.
+ ///
+ /// <a name="version"></a>
+ /// <para>You must specify the required <seealso cref="Version"/>
+ /// compatibility when creating <seealso cref="CharArrayMap"/>:
+ /// <ul>
+ /// <li> As of 3.1, supplementary characters are
+ /// properly lowercased.</li>
+ /// </ul>
+ /// Before 3.1 supplementary characters could not be
+ /// lowercased correctly due to the lack of Unicode 4
+ /// support in JDK 1.4. To use instances of
+ /// <seealso cref="CharArrayMap"/> with the behavior before Lucene
+ /// 3.1 pass a <seealso cref="Version"/> < 3.1 to the constructors.
+ /// </para>
+ /// </summary>
+ public class CharArrayMap<V> : AbstractMap<object, V>
+ {
+ // private only because missing generics
+//JAVA TO C# CONVERTER TODO TASK: Java wildcard generics are not converted to .NET:
+//ORIGINAL LINE: private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
+ private static readonly CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<?>();
+
+ private const int INIT_SIZE = 8;
+ private readonly CharacterUtils charUtils;
+ private bool ignoreCase;
+ private int count;
+ internal readonly Version matchVersion; // package private because used in CharArraySet
+ internal char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+ internal V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
+
+ /// <summary>
+ /// Create map with enough capacity to hold startSize terms
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. </param>
+ /// <param name="startSize">
+ /// the initial capacity </param>
+ /// <param name="ignoreCase">
+ /// <code>false</code> if and only if the set should be case sensitive
+ /// otherwise <code>true</code>. </param>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public CharArrayMap(org.apache.lucene.util.Version matchVersion, int startSize, boolean ignoreCase)
+ public CharArrayMap(Version matchVersion, int startSize, bool ignoreCase)
+ {
+ this.ignoreCase = ignoreCase;
+ int size_Renamed = INIT_SIZE;
+ while (startSize + (startSize >> 2) > size_Renamed)
+ {
+ size_Renamed <<= 1;
+ }
+ keys = new char[size_Renamed][];
+ values = (V[]) new object[size_Renamed];
+ this.charUtils = CharacterUtils.getInstance(matchVersion);
+ this.matchVersion = matchVersion;
+ }
+
+ /// <summary>
+ /// Creates a map from the mappings in another map.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. </param>
+ /// <param name="c">
+ /// a map whose mappings to be copied </param>
+ /// <param name="ignoreCase">
+ /// <code>false</code> if and only if the set should be case sensitive
+ /// otherwise <code>true</code>. </param>
+ public CharArrayMap<T1>(Version matchVersion, IDictionary<T1> c, bool ignoreCase) where T1 : V : this(matchVersion, c.Count, ignoreCase)
+ {
+ putAll(c);
+ }
+
+ /// <summary>
+ /// Create set from the supplied map (used internally for readonly maps...) </summary>
+ private CharArrayMap(CharArrayMap<V> toCopy)
+ {
+ this.keys = toCopy.keys;
+ this.values = toCopy.values;
+ this.ignoreCase = toCopy.ignoreCase;
+ this.count = toCopy.count;
+ this.charUtils = toCopy.charUtils;
+ this.matchVersion = toCopy.matchVersion;
+ }
+
+ /// <summary>
+ /// Clears all entries in this map. This method is supported for reusing, but not <seealso cref="Map#remove"/>. </summary>
+ public override void clear()
+ {
+ count = 0;
+ Arrays.fill(keys, null);
+ Arrays.fill(values, null);
+ }
+
+ /// <summary>
+ /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+ /// are in the <seealso cref="#keySet()"/>
+ /// </summary>
+ public virtual bool containsKey(char[] text, int off, int len)
+ {
+ return keys[getSlot(text, off, len)] != null;
+ }
+
+ /// <summary>
+ /// true if the <code>CharSequence</code> is in the <seealso cref="#keySet()"/> </summary>
+ public virtual bool containsKey(CharSequence cs)
+ {
+ return keys[getSlot(cs)] != null;
+ }
+
+ public override bool containsKey(object o)
+ {
+ if (o is char[])
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+ char[] text = (char[])o;
+ return containsKey(text, 0, text.Length);
+ }
+ return containsKey(o.ToString());
+ }
+
+ /// <summary>
+ /// returns the value of the mapping of <code>len</code> chars of <code>text</code>
+ /// starting at <code>off</code>
+ /// </summary>
+ public virtual V get(char[] text, int off, int len)
+ {
+ return values[getSlot(text, off, len)];
+ }
+
+ /// <summary>
+ /// returns the value of the mapping of the chars inside this {@code CharSequence} </summary>
+ public virtual V get(CharSequence cs)
+ {
+ return values[getSlot(cs)];
+ }
+
+ public override V get(object o)
+ {
+ if (o is char[])
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[] text = (char[])o;
+ char[] text = (char[])o;
+ return get(text, 0, text.Length);
+ }
+ return get(o.ToString());
+ }
+
+ private int getSlot(char[] text, int off, int len)
+ {
+ int code = getHashCode(text, off, len);
+ int pos = code & (keys.Length - 1);
+ char[] text2 = keys[pos];
+ if (text2 != null && !Equals(text, off, len, text2))
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (keys.Length - 1);
+ text2 = keys[pos];
+ } while (text2 != null && !Equals(text, off, len, text2));
+ }
+ return pos;
+ }
+
+ /// <summary>
+ /// Returns true if the String is in the set </summary>
+ private int getSlot(CharSequence text)
+ {
+ int code = getHashCode(text);
+ int pos = code & (keys.Length - 1);
+ char[] text2 = keys[pos];
+ if (text2 != null && !Equals(text, text2))
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int inc = ((code>>8)+code)|1;
+ int inc = ((code >> 8) + code) | 1;
+ do
+ {
+ code += inc;
+ pos = code & (keys.Length - 1);
+ text2 = keys[pos];
+ } while (text2 != null && !Equals(text, text2));
+ }
+ return pos;
+ }
+
+ /// <summary>
+ /// Add the given mapping. </summary>
+ public virtual V put(CharSequence text, V value)
+ {
+ return put(text.ToString(), value); // could be more efficient
+ }
+
+ public override V put(object o, V value)
+ {
+ if (o is char[])
+ {
+ return put((char[])o, value);
+ }
+ return put(o.ToString(), value);
+ }
+
+ /// <summary>
+ /// Add the given mapping. </summary>
+ public virtual V put(string text, V value)
+ {
+ return put(text.ToCharArray(), value);
+ }
+
+ /// <summary>
+ /// Add the given mapping.
+ /// If ignoreCase is true for this Set, the text array will be directly modified.
+ /// The user should never modify this text array after calling this method.
+ /// </summary>
+ public virtual V put(char[] text, V value)
+ {
+ if (ignoreCase)
+ {
+ charUtils.ToLower(text, 0, text.Length);
+ }
+ int slot = getSlot(text, 0, text.Length);
+ if (keys[slot] != null)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V oldValue = values[slot];
+ V oldValue = values[slot];
+ values[slot] = value;
+ return oldValue;
+ }
+ keys[slot] = text;
+ values[slot] = value;
+ count++;
+
+ if (count + (count >> 2) > keys.Length)
+ {
+ rehash();
+ }
+
+ return null;
+ }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") private void rehash()
+ private void rehash()
+ {
+ Debug.Assert(keys.Length == values.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int newSize = 2*keys.length;
+ int newSize = 2 * keys.Length;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] oldkeys = keys;
+ char[][] oldkeys = keys;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] oldvalues = values;
+ V[] oldvalues = values;
+ keys = new char[newSize][];
+ values = (V[]) new object[newSize];
+
+ for (int i = 0; i < oldkeys.Length; i++)
+ {
+ char[] text = oldkeys[i];
+ if (text != null)
+ {
+ // todo: could be faster... no need to compare strings on collision
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int slot = getSlot(text,0,text.length);
+ int slot = getSlot(text,0,text.Length);
+ keys[slot] = text;
+ values[slot] = oldvalues[i];
+ }
+ }
+ }
+
+ private bool Equals(char[] text1, int off, int len, char[] text2)
+ {
+ if (len != text2.Length)
+ {
+ return false;
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int limit = off+len;
+ int limit = off + len;
+ if (ignoreCase)
+ {
+ for (int i = 0;i < len;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
+ int codePointAt = charUtils.codePointAt(text1, off + i, limit);
+ if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+ {
+ return false;
+ }
+ i += char.charCount(codePointAt);
+ }
+ }
+ else
+ {
+ for (int i = 0;i < len;i++)
+ {
+ if (text1[off + i] != text2[i])
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private bool Equals(CharSequence text1, char[] text2)
+ {
+ int len = text1.length();
+ if (len != text2.Length)
+ {
+ return false;
+ }
+ if (ignoreCase)
+ {
+ for (int i = 0;i < len;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text1, i);
+ int codePointAt = charUtils.codePointAt(text1, i);
+ if (char.ToLower(codePointAt) != charUtils.codePointAt(text2, i, text2.Length))
+ {
+ return false;
+ }
+ i += char.charCount(codePointAt);
+ }
+ }
+ else
+ {
+ for (int i = 0;i < len;i++)
+ {
+ if (text1.charAt(i) != text2[i])
+ {
+ return false;
+ }
+ }
+ }
+ return true;
+ }
+
+ private int getHashCode(char[] text, int offset, int len)
+ {
+ if (text == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ int code = 0;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int stop = offset + len;
+ int stop = offset + len;
+ if (ignoreCase)
+ {
+ for (int i = offset; i < stop;)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final int codePointAt = charUtils.codePointAt(text, i, stop);
+ int codePointAt = charUtils.codePointAt(text, i, stop);
+ code = code * 31 + char.ToLower(codePointAt);
+ i += char.charCount(codePointAt);
+ }
+ }
+ else
+ {
+ for (int i = offset; i < stop; i++)
+ {
+ code = code * 31 + text[i];
+ }
+ }
+ return code;
+ }
+
+ private int getHashCode(CharSequence text)
+ {
+ if (text == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ int code = 0;
+ int len = text.length();
+ if (ignoreCase)
+ {
+ for (int i = 0; i < len;)
+ {
+ int codePointAt = charUtils.codePointAt(text, i);
+ code = code * 31 + char.ToLower(codePointAt);
+ i += char.charCount(codePointAt);
+ }
+ }
+ else
+ {
+ for (int i = 0; i < len; i++)
+ {
+ code = code * 31 + text.charAt(i);
+ }
+ }
+ return code;
+ }
+
+ public override V remove(object key)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override int size()
+ {
+ return count;
+ }
+
+ public override string ToString()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("{");
+ StringBuilder sb = new StringBuilder("{");
+ foreach (KeyValuePair<object, V> entry in entrySet())
+ {
+ if (sb.Length > 1)
+ {
+ sb.Append(", ");
+ }
+ sb.Append(entry);
+ }
+ return sb.Append('}').ToString();
+ }
+
+ private EntrySet entrySet_Renamed = null;
+ private CharArraySet keySet_Renamed = null;
+
+ internal virtual EntrySet createEntrySet()
+ {
+ return new EntrySet(this, true);
+ }
+
+ public override EntrySet entrySet()
+ {
+ if (entrySet_Renamed == null)
+ {
+ entrySet_Renamed = createEntrySet();
+ }
+ return entrySet_Renamed;
+ }
+
+ // helper for CharArraySet to not produce endless recursion
+ internal HashSet<object> originalKeySet()
+ {
+ return base.Keys;
+ }
+
+ /// <summary>
+ /// Returns an <seealso cref="CharArraySet"/> view on the map's keys.
+ /// The set will use the same {@code matchVersion} as this map.
+ /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings({"unchecked","rawtypes"}) public final CharArraySet keySet()
+ public override CharArraySet keySet()
+ {
+ if (keySet_Renamed == null)
+ {
+ // prevent adding of entries
+ keySet_Renamed = new CharArraySetAnonymousInnerClassHelper(this, (CharArrayMap) this);
+ }
+ return keySet_Renamed;
+ }
+
+ private class CharArraySetAnonymousInnerClassHelper : CharArraySet
+ {
+ private readonly CharArrayMap<V> outerInstance;
+
+ public CharArraySetAnonymousInnerClassHelper(CharArrayMap<V> outerInstance, CharArrayMap (CharArrayMap) this) : base((CharArrayMap) this)
+ {
+ this.outerInstance = outerInstance;
+ }
+
+ public override bool add(object o)
+ {
+ throw new System.NotSupportedException();
+ }
+ public override bool add(CharSequence text)
+ {
+ throw new System.NotSupportedException();
+ }
+ public override bool add(string text)
+ {
+ throw new System.NotSupportedException();
+ }
+ public override bool add(char[] text)
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+ /// <summary>
+ /// public iterator class so efficient methods are exposed to users </summary>
+ public class EntryIterator : IEnumerator<KeyValuePair<object, V>>
+ {
+ private readonly CharArrayMap<V> outerInstance;
+
+ internal int pos = -1;
+ internal int lastPos;
+ internal readonly bool allowModify;
+
+ internal EntryIterator(CharArrayMap<V> outerInstance, bool allowModify)
+ {
+ this.outerInstance = outerInstance;
+ this.allowModify = allowModify;
+ goNext();
+ }
+
+ internal virtual void goNext()
+ {
+ lastPos = pos;
+ pos++;
+ while (pos < outerInstance.keys.Length && outerInstance.keys[pos] == null)
+ {
+ pos++;
+ }
+ }
+
+ public override bool hasNext()
+ {
+ return pos < outerInstance.keys.Length;
+ }
+
+ /// <summary>
+ /// gets the next key... do not modify the returned char[] </summary>
+ public virtual char[] nextKey()
+ {
+ goNext();
+ return outerInstance.keys[lastPos];
+ }
+
+ /// <summary>
+ /// gets the next key as a newly created String object </summary>
+ public virtual string nextKeyString()
+ {
+ return new string(nextKey());
+ }
+
+ /// <summary>
+ /// returns the value associated with the last key returned </summary>
+ public virtual V currentValue()
+ {
+ return outerInstance.values[lastPos];
+ }
+
+ /// <summary>
+ /// sets the value associated with the last key returned </summary>
+ public virtual V setValue(V value)
+ {
+ if (!allowModify)
+ {
+ throw new System.NotSupportedException();
+ }
+ V old = outerInstance.values[lastPos];
+ outerInstance.values[lastPos] = value;
+ return old;
+ }
+
+ /// <summary>
+ /// use nextCharArray() + currentValue() for better efficiency. </summary>
+ public override KeyValuePair<object, V> next()
+ {
+ goNext();
+ return new MapEntry(outerInstance, lastPos, allowModify);
+ }
+
+ public override void remove()
+ {
+ throw new System.NotSupportedException();
+ }
+ }
+
+ private sealed class MapEntry : KeyValuePair<object, V>
+ {
+ private readonly CharArrayMap<V> outerInstance;
+
+ internal readonly int pos;
+ internal readonly bool allowModify;
+
+ internal MapEntry(CharArrayMap<V> outerInstance, int pos, bool allowModify)
+ {
+ this.outerInstance = outerInstance;
+ this.pos = pos;
+ this.allowModify = allowModify;
+ }
+
+ public override object Key
+ {
+ get
+ {
+ // we must clone here, as putAll to another CharArrayMap
+ // with other case sensitivity flag would corrupt the keys
+ return outerInstance.keys[pos].clone();
+ }
+ }
+
+ public override V Value
+ {
+ get
+ {
+ return outerInstance.values[pos];
+ }
+ }
+
+ public override V setValue(V value)
+ {
+ if (!allowModify)
+ {
+ throw new System.NotSupportedException();
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V old = values[pos];
+ V old = outerInstance.values[pos];
+ outerInstance.values[pos] = value;
+ return old;
+ }
+
+ public override string ToString()
+ {
+ return (new StringBuilder()).Append(outerInstance.keys[pos]).Append('=').Append((outerInstance.values[pos] == outerInstance) ? "(this Map)" : outerInstance.values[pos]).ToString();
+ }
+ }
+
+ /// <summary>
+ /// public EntrySet class so efficient methods are exposed to users </summary>
+ public sealed class EntrySet : AbstractSet<KeyValuePair<object, V>>
+ {
+ private readonly CharArrayMap<V> outerInstance;
+
+ internal readonly bool allowModify;
+
+ internal EntrySet(CharArrayMap<V> outerInstance, bool allowModify)
+ {
+ this.outerInstance = outerInstance;
+ this.allowModify = allowModify;
+ }
+
+ public override EntryIterator iterator()
+ {
+ return new EntryIterator(outerInstance, allowModify);
+ }
+
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public boolean contains(Object o)
+ public override bool contains(object o)
+ {
+ if (!(o is DictionaryEntry))
+ {
+ return false;
+ }
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final java.util.Map.Entry<Object,V> e = (java.util.Map.Entry<Object,V>)o;
+ KeyValuePair<object, V> e = (KeyValuePair<object, V>)o;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object key = e.getKey();
+ object key = e.Key;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object val = e.getValue();
+ object val = e.Value;
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final Object v = get(key);
+ object v = outerInstance.get(key);
+ return v == null ? val == null : v.Equals(val);
+ }
+
+ public override bool remove(object o)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public override int size()
+ {
+ return outerInstance.count;
+ }
+
+ public override void clear()
+ {
+ if (!allowModify)
+ {
+ throw new System.NotSupportedException();
+ }
+ outerInstance.clear();
+ }
+ }
+
+ /// <summary>
+ /// Returns an unmodifiable <seealso cref="CharArrayMap"/>. This allows to provide
+ /// unmodifiable views of internal map for "read-only" use.
+ /// </summary>
+ /// <param name="map">
+ /// a map for which the unmodifiable map is returned. </param>
+ /// <returns> an new unmodifiable <seealso cref="CharArrayMap"/>. </returns>
+ /// <exception cref="NullPointerException">
+ /// if the given map is <code>null</code>. </exception>
+ public static CharArrayMap<V> unmodifiableMap<V>(CharArrayMap<V> map)
+ {
+ if (map == null)
+ {
+ throw new System.NullReferenceException("Given map is null");
+ }
+ if (map == emptyMap() || map.Empty)
+ {
+ return emptyMap();
+ }
+ if (map is UnmodifiableCharArrayMap)
+ {
+ return map;
+ }
+ return new UnmodifiableCharArrayMap<>(map);
+ }
+
+ /// <summary>
+ /// Returns a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+ /// is a <seealso cref="CharArrayMap"/> the ignoreCase property will be preserved.
+ /// <para>
+ /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArrayMap"/> where
+ /// the <seealso cref="Version"/> of the source map differs from its copy
+ /// <seealso cref="#CharArrayMap(Version, Map, boolean)"/> should be used instead.
+ /// The <seealso cref="#copy(Version, Map)"/> will preserve the <seealso cref="Version"/> of the
+ /// source map it is an instance of <seealso cref="CharArrayMap"/>.
+ /// </para>
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. This argument will be ignored if the
+ /// given map is a <seealso cref="CharArrayMap"/>. </param>
+ /// <param name="map">
+ /// a map to copy </param>
+ /// <returns> a copy of the given map as a <seealso cref="CharArrayMap"/>. If the given map
+ /// is a <seealso cref="CharArrayMap"/> the ignoreCase property as well as the
+ /// matchVersion will be of the given map will be preserved. </returns>
+//JAVA TO C# CONVERTER TODO TASK: The following line could not be converted:
+ SuppressWarnings("unchecked") public static <V> CharArrayMap<V> copy(final org.apache.lucene.util.Version matchVersion, final java.util.Map<?,? extends V> map)
+ {
+ if (map == EMPTY_MAP)
+ {
+ return emptyMap();
+ }
+ if (map is CharArrayMap)
+ {
+ CharArrayMap<V> m = (CharArrayMap<V>) map;
+ // use fast path instead of iterating all values
+ // this is even on very small sets ~10 times faster than iterating
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final char[][] keys = new char[m.keys.length][];
+ char[][] keys = new char[m.keys.Length][];
+ Array.Copy(m.keys, 0, keys, 0, keys.Length);
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final V[] values = (V[]) new Object[m.values.length];
+ V[] values = (V[]) new object[m.values.Length];
+ Array.Copy(m.values, 0, values, 0, values.Length);
+ m = new CharArrayMap<>(m);
+ m.keys = keys;
+ m.values = values;
+ return m;
+ }
+ return new CharArrayMap<>(matchVersion, map, false);
+ }
+
+ /// <summary>
+ /// Returns an empty, unmodifiable map. </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @SuppressWarnings("unchecked") public static <V> CharArrayMap<V> emptyMap()
+ public static <V> CharArrayMap<V> emptyMap()
+ {
+ return (CharArrayMap<V>) EMPTY_MAP;
+ }
+
+ // package private CharArraySet instanceof check in CharArraySet
+ static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V>
+ {
+
+ UnmodifiableCharArrayMap(CharArrayMap<V> map)
+ {
+ base(map);
+ }
+
+ public void clear()
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public V put(object o, V val)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public V put(char[] text, V val)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public V put(CharSequence text, V val)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public V put(string text, V val)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ public V remove(object key)
+ {
+ throw new System.NotSupportedException();
+ }
+
+ EntrySet createEntrySet()
+ {
+ return new EntrySet(this, false);
+ }
+ }
+
+ /// <summary>
+ /// Empty <seealso cref="org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap"/> optimized for speed.
+ /// Contains checks will always return <code>false</code> or throw
+ /// NPE if necessary.
+ /// </summary>
+ private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V>
+ {
+ EmptyCharArrayMap()
+ {
+ base(new CharArrayMap<V>(Version.LUCENE_CURRENT, 0, false));
+ }
+
+ public bool containsKey(char[] text, int off, int len)
+ {
+ if (text == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return false;
+ }
+
+ public bool containsKey(CharSequence cs)
+ {
+ if (cs == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return false;
+ }
+
+ public bool containsKey(object o)
+ {
+ if (o == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return false;
+ }
+
+ public V get(char[] text, int off, int len)
+ {
+ if (text == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return null;
+ }
+
+ public V get(CharSequence cs)
+ {
+ if (cs == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return null;
+ }
+
+ public V get(object o)
+ {
+ if (o == null)
+ {
+ throw new System.NullReferenceException();
+ }
+ return null;
+ }
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
new file mode 100644
index 0000000..d9253d7
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -0,0 +1,267 @@
+using System.Collections.Generic;
+using System.Text;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ using Version = org.apache.lucene.util.Version;
+
+
+ /// <summary>
+ /// A simple class that stores Strings as char[]'s in a
+ /// hash table. Note that this is not a general purpose
+ /// class. For example, it cannot remove items from the
+ /// set, nor does it resize its hash table to be smaller,
+ /// etc. It is designed to be quick to test if a char[]
+ /// is in the set without the necessity of converting it
+ /// to a String first.
+ ///
+ /// <a name="version"></a>
+ /// <para>You must specify the required <seealso cref="Version"/>
+ /// compatibility when creating <seealso cref="CharArraySet"/>:
+ /// <ul>
+ /// <li> As of 3.1, supplementary characters are
+ /// properly lowercased.</li>
+ /// </ul>
+ /// Before 3.1 supplementary characters could not be
+ /// lowercased correctly due to the lack of Unicode 4
+ /// support in JDK 1.4. To use instances of
+ /// <seealso cref="CharArraySet"/> with the behavior before Lucene
+ /// 3.1 pass a <seealso cref="Version"/> < 3.1 to the constructors.
+ /// <P>
+ /// <em>Please note:</em> This class implements <seealso cref="java.util.Set Set"/> but
+ /// does not behave like it should in all cases. The generic type is
+ /// {@code Set<Object>}, because you can add any object to it,
+ /// that has a string representation. The add methods will use
+ /// <seealso cref="Object#toString"/> and store the result using a {@code char[]}
+ /// buffer. The same behavior have the {@code contains()} methods.
+ /// The <seealso cref="#iterator()"/> returns an {@code Iterator<char[]>}.
+ /// </para>
+ /// </summary>
+ public class CharArraySet : AbstractSet<object>
+ {
+ public static readonly CharArraySet EMPTY_SET = new CharArraySet(CharArrayMap.emptyMap<object>());
+ private static readonly object PLACEHOLDER = new object();
+
+ private readonly CharArrayMap<object> map;
+
+ /// <summary>
+ /// Create set with enough capacity to hold startSize terms
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. </param>
+ /// <param name="startSize">
+ /// the initial capacity </param>
+ /// <param name="ignoreCase">
+ /// <code>false</code> if and only if the set should be case sensitive
+ /// otherwise <code>true</code>. </param>
+ public CharArraySet(Version matchVersion, int startSize, bool ignoreCase) : this(new CharArrayMap<>(matchVersion, startSize, ignoreCase))
+ {
+ }
+
+ /// <summary>
+ /// Creates a set from a Collection of objects.
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. </param>
+ /// <param name="c">
+ /// a collection whose elements to be placed into the set </param>
+ /// <param name="ignoreCase">
+ /// <code>false</code> if and only if the set should be case sensitive
+ /// otherwise <code>true</code>. </param>
+ public CharArraySet<T1>(Version matchVersion, ICollection<T1> c, bool ignoreCase) : this(matchVersion, c.Count, ignoreCase)
+ {
+ addAll(c);
+ }
+
+ /// <summary>
+ /// Create set from the specified map (internal only), used also by <seealso cref="CharArrayMap#keySet()"/> </summary>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: CharArraySet(final CharArrayMap<Object> map)
+ internal CharArraySet(CharArrayMap<object> map)
+ {
+ this.map = map;
+ }
+
+ /// <summary>
+ /// Clears all entries in this set. This method is supported for reusing, but not <seealso cref="Set#remove"/>. </summary>
+ public override void clear()
+ {
+ map.clear();
+ }
+
+ /// <summary>
+ /// true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+ /// are in the set
+ /// </summary>
+ public virtual bool contains(char[] text, int off, int len)
+ {
+ return map.containsKey(text, off, len);
+ }
+
+ /// <summary>
+ /// true if the <code>CharSequence</code> is in the set </summary>
+ public virtual bool contains(CharSequence cs)
+ {
+ return map.containsKey(cs);
+ }
+
+ public override bool contains(object o)
+ {
+ return map.containsKey(o);
+ }
+
+ public override bool add(object o)
+ {
+ return map.put(o, PLACEHOLDER) == null;
+ }
+
+ /// <summary>
+ /// Add this CharSequence into the set </summary>
+ public virtual bool add(CharSequence text)
+ {
+ return map.put(text, PLACEHOLDER) == null;
+ }
+
+ /// <summary>
+ /// Add this String into the set </summary>
+ public virtual bool add(string text)
+ {
+ return map.put(text, PLACEHOLDER) == null;
+ }
+
+ /// <summary>
+ /// Add this char[] directly to the set.
+ /// If ignoreCase is true for this Set, the text array will be directly modified.
+ /// The user should never modify this text array after calling this method.
+ /// </summary>
+ public virtual bool add(char[] text)
+ {
+ return map.put(text, PLACEHOLDER) == null;
+ }
+
+ public override int size()
+ {
+ return map.size();
+ }
+
+ /// <summary>
+ /// Returns an unmodifiable <seealso cref="CharArraySet"/>. This allows to provide
+ /// unmodifiable views of internal sets for "read-only" use.
+ /// </summary>
+ /// <param name="set">
+ /// a set for which the unmodifiable set is returned. </param>
+ /// <returns> an new unmodifiable <seealso cref="CharArraySet"/>. </returns>
+ /// <exception cref="NullPointerException">
+ /// if the given set is <code>null</code>. </exception>
+ public static CharArraySet unmodifiableSet(CharArraySet set)
+ {
+ if (set == null)
+ {
+ throw new System.NullReferenceException("Given set is null");
+ }
+ if (set == EMPTY_SET)
+ {
+ return EMPTY_SET;
+ }
+ if (set.map is CharArrayMap.UnmodifiableCharArrayMap)
+ {
+ return set;
+ }
+ return new CharArraySet(CharArrayMap.unmodifiableMap(set.map));
+ }
+
+ /// <summary>
+ /// Returns a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+ /// is a <seealso cref="CharArraySet"/> the ignoreCase property will be preserved.
+ /// <para>
+ /// <b>Note:</b> If you intend to create a copy of another <seealso cref="CharArraySet"/> where
+ /// the <seealso cref="Version"/> of the source set differs from its copy
+ /// <seealso cref="#CharArraySet(Version, Collection, boolean)"/> should be used instead.
+ /// The <seealso cref="#copy(Version, Set)"/> will preserve the <seealso cref="Version"/> of the
+ /// source set it is an instance of <seealso cref="CharArraySet"/>.
+ /// </para>
+ /// </summary>
+ /// <param name="matchVersion">
+ /// compatibility match version see <a href="#version">Version
+ /// note</a> above for details. This argument will be ignored if the
+ /// given set is a <seealso cref="CharArraySet"/>. </param>
+ /// <param name="set">
+ /// a set to copy </param>
+ /// <returns> a copy of the given set as a <seealso cref="CharArraySet"/>. If the given set
+ /// is a <seealso cref="CharArraySet"/> the ignoreCase property as well as the
+ /// matchVersion will be of the given set will be preserved. </returns>
+//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
+//ORIGINAL LINE: public static CharArraySet copy(final org.apache.lucene.util.Version matchVersion, final java.util.Set<?> set)
+ public static CharArraySet copy<T1>(Version matchVersion, HashSet<T1> set)
+ {
+ if (set == EMPTY_SET)
+ {
+ return EMPTY_SET;
+ }
+ if (set is CharArraySet)
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final CharArraySet source = (CharArraySet) set;
+ CharArraySet source = (CharArraySet) set;
+ return new CharArraySet(CharArrayMap.copy(source.map.matchVersion, source.map));
+ }
+ return new CharArraySet(matchVersion, set, false);
+ }
+
+ /// <summary>
+ /// Returns an <seealso cref="Iterator"/> for {@code char[]} instances in this set.
+ /// </summary>
+//JAVA TO C# CONVERTER TODO TASK: Most Java annotations will not have direct .NET equivalent attributes:
+//ORIGINAL LINE: @Override @SuppressWarnings("unchecked") public java.util.Iterator<Object> iterator()
+ public override IEnumerator<object> iterator()
+ {
+ // use the AbstractSet#keySet()'s iterator (to not produce endless recursion)
+ return map.originalKeySet().GetEnumerator();
+ }
+
+ public override string ToString()
+ {
+//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+//ORIGINAL LINE: final StringBuilder sb = new StringBuilder("[");
+ StringBuilder sb = new StringBuilder("[");
+ foreach (object item in this)
+ {
+ if (sb.Length > 1)
+ {
+ sb.Append(", ");
+ }
+ if (item is char[])
+ {
+ sb.Append((char[]) item);
+ }
+ else
+ {
+ sb.Append(item);
+ }
+ }
+ return sb.Append(']').ToString();
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
new file mode 100644
index 0000000..e2f5b0a
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharFilterFactory.cs
@@ -0,0 +1,86 @@
+using System;
+using System.Collections.Generic;
+using Lucene.Net.Analysis.Util;
+
+namespace org.apache.lucene.analysis.util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+ /// <summary>
+ /// Abstract parent class for analysis factories that create <seealso cref="CharFilter"/>
+ /// instances.
+ /// </summary>
+ public abstract class CharFilterFactory : AbstractAnalysisFactory
+ {
+
+ private static readonly AnalysisSPILoader<CharFilterFactory> loader = new AnalysisSPILoader<CharFilterFactory>(typeof(CharFilterFactory));
+
+ /// <summary>
+ /// looks up a charfilter by name from context classpath </summary>
+ public static CharFilterFactory forName(string name, IDictionary<string, string> args)
+ {
+ return loader.newInstance(name, args);
+ }
+
+ /// <summary>
+ /// looks up a charfilter class by name from context classpath </summary>
+ public static Type lookupClass(string name)
+ {
+ return loader.lookupClass(name);
+ }
+
+ /// <summary>
+ /// returns a list of all available charfilter names </summary>
+ public static HashSet<string> availableCharFilters()
+ {
+ return loader.availableServices();
+ }
+
+ /// <summary>
+ /// Reloads the factory list from the given <seealso cref="ClassLoader"/>.
+ /// Changes to the factories are visible after the method ends, all
+ /// iterators (<seealso cref="#availableCharFilters()"/>,...) stay consistent.
+ ///
+ /// <para><b>NOTE:</b> Only new factories are added, existing ones are
+ /// never removed or replaced.
+ ///
+ /// </para>
+ /// <para><em>This method is expensive and should only be called for discovery
+ /// of new factories on the given classpath/classloader!</em>
+ /// </para>
+ /// </summary>
+ public static void reloadCharFilters(ClassLoader classloader)
+ {
+ loader.reload(classloader);
+ }
+
+ /// <summary>
+ /// Initialize this factory via a set of key-value pairs.
+ /// </summary>
+ protected internal CharFilterFactory(IDictionary<string, string> args) : base(args)
+ {
+ }
+
+ /// <summary>
+ /// Wraps the given Reader with a CharFilter. </summary>
+ public abstract Reader create(Reader input);
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/99717176/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
new file mode 100644
index 0000000..1cd6395
--- /dev/null
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharTokenizer.cs
@@ -0,0 +1,209 @@
+using System.Diagnostics;
+using System.IO;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.util;
+
+namespace Lucene.Net.Analysis.Util
+{
+
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// An abstract base class for simple, character-oriented tokenizers.
+ /// <para>
+ /// <a name="version">You must specify the required <seealso cref="Version"/> compatibility
+ /// when creating <seealso cref="CharTokenizer"/>:
+ /// <ul>
+ /// <li>As of 3.1, <seealso cref="CharTokenizer"/> uses an int based API to normalize and
+ /// detect token codepoints. See <seealso cref="#isTokenChar(int)"/> and
+ /// <seealso cref="#normalize(int)"/> for details.</li>
+ /// </ul>
+ /// </para>
+ /// <para>
+ /// A new <seealso cref="CharTokenizer"/> API has been introduced with Lucene 3.1. This API
+ /// moved from UTF-16 code units to UTF-32 codepoints to eventually add support
+ /// for <a href=
+ /// "http://java.sun.com/j2se/1.5.0/docs/api/java/lang/Character.html#supplementary"
+ /// >supplementary characters</a>. The old <i>char</i> based API has been
+ /// deprecated and should be replaced with the <i>int</i> based methods
+ /// <seealso cref="#isTokenChar(int)"/> and <seealso cref="#normalize(int)"/>.
+ /// </para>
+ /// <para>
+ /// As of Lucene 3.1 each <seealso cref="CharTokenizer"/> - constructor expects a
+ /// <seealso cref="Version"/> argument. Based on the given <seealso cref="Version"/> either the new
+ /// API or a backwards compatibility layer is used at runtime. For
+ /// <seealso cref="Version"/> < 3.1 the backwards compatibility layer ensures correct
+ /// behavior even for indexes build with previous versions of Lucene. If a
+ /// <seealso cref="Version"/> >= 3.1 is used <seealso cref="CharTokenizer"/> requires the new API to
+ /// be implemented by the instantiated class. Yet, the old <i>char</i> based API
+ /// is not required anymore even if backwards compatibility must be preserved.
+ /// <seealso cref="CharTokenizer"/> subclasses implementing the new API are fully backwards
+ /// compatible if instantiated with <seealso cref="Version"/> < 3.1.
+ /// </para>
+ /// <para>
+ /// <strong>Note:</strong> If you use a subclass of <seealso cref="CharTokenizer"/> with <seealso cref="Version"/> >=
+ /// 3.1 on an index build with a version < 3.1, created tokens might not be
+ /// compatible with the terms in your index.
+ /// </para>
+ ///
+ /// </summary>
+ public abstract class CharTokenizer : Tokenizer
+ {
+ private readonly TextReader _input;
+
+ /// <summary>
+ /// Creates a new <seealso cref="CharTokenizer"/> instance
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to match </param>
+ /// <param name="input">
+ /// the input to split up into tokens </param>
+ public CharTokenizer(Version matchVersion, TextReader input)
+ : base(input)
+ {
+ charUtils = CharacterUtils.getInstance(matchVersion);
+ }
+
+ /// <summary>
+ /// Creates a new <seealso cref="CharTokenizer"/> instance
+ /// </summary>
+ /// <param name="matchVersion">
+ /// Lucene version to match </param>
+ /// <param name="factory">
+ /// the attribute factory to use for this <seealso cref="Tokenizer"/> </param>
+ /// <param name="input">
+ /// the input to split up into tokens </param>
+ public CharTokenizer(Version matchVersion, AttributeFactory factory, TextReader input)
+ : base(factory, input)
+ {
+ _input = input;
+ charUtils = CharacterUtils.getInstance(matchVersion);
+ }
+
+ private int offset = 0, bufferIndex = 0, dataLen = 0, finalOffset = 0;
+ private const int MAX_WORD_LEN = 255;
+ private const int IO_BUFFER_SIZE = 4096;
+
+ private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+ private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
+
+ private readonly CharacterUtils charUtils;
+ private readonly CharacterUtils.CharacterBuffer ioBuffer = CharacterUtils.newCharacterBuffer(IO_BUFFER_SIZE);
+
+ /// <summary>
+ /// Returns true iff a codepoint should be included in a token. This tokenizer
+ /// generates as tokens adjacent sequences of codepoints which satisfy this
+ /// predicate. Codepoints for which this is false are used to define token
+ /// boundaries and are not included in tokens.
+ /// </summary>
+ protected internal abstract bool IsTokenChar(char c);
+
+ /// <summary>
+ /// Called on each token character to normalize it before it is added to the
+ /// token. The default implementation does nothing. Subclasses may use this to,
+ /// e.g., lowercase tokens.
+ /// </summary>
+ protected virtual int Normalize(int c)
+ {
+ return c;
+ }
+
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ int length = 0;
+ int start = -1; // this variable is always initialized
+ int end_Renamed = -1;
+ char[] buffer = termAtt.Buffer();
+ while (true)
+ {
+ if (bufferIndex >= dataLen)
+ {
+ offset += dataLen;
+ charUtils.fill(ioBuffer, input); // read supplementary char aware with CharacterUtils
+ if (ioBuffer.Length == 0)
+ {
+ dataLen = 0; // so next offset += dataLen won't decrement offset
+ if (length > 0)
+ {
+ break;
+ }
+ else
+ {
+ finalOffset = CorrectOffset(offset);
+ return false;
+ }
+ }
+ dataLen = ioBuffer.Length;
+ bufferIndex = 0;
+ }
+ // use CharacterUtils here to support < 3.1 UTF-16 code unit behavior if the char based methods are gone
+ int c = charUtils.codePointAt(ioBuffer.Buffer, bufferIndex, ioBuffer.Length);
+ int charCount = Character.CharCount(c);
+ bufferIndex += charCount;
+
+ if (isTokenChar(c)) // if it's a token char
+ {
+ if (length == 0) // start of token
+ {
+ Debug.Assert(start == -1);
+ start = offset + bufferIndex - charCount;
+ end_Renamed = start;
+ } // check if a supplementary could run out of bounds
+ else if (length >= buffer.Length - 1)
+ {
+ buffer = termAtt.ResizeBuffer(2 + length); // make sure a supplementary fits in the buffer
+ }
+ end_Renamed += charCount;
+ length += Character.ToChars(Normalize(c), buffer, length); // buffer it, normalized
+ if (length >= MAX_WORD_LEN) // buffer overflow! make sure to check for >= surrogate pair could break == test
+ {
+ break;
+ }
+ } // at non-Letter w/ chars
+ else if (length > 0)
+ {
+ break; // return 'em
+ }
+ }
+
+ termAtt.Length = length;
+ Debug.Assert(start != -1);
+ offsetAtt.SetOffset(CorrectOffset(start), finalOffset = CorrectOffset(end_Renamed));
+ return true;
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ bufferIndex = 0;
+ offset = 0;
+ dataLen = 0;
+ finalOffset = 0;
+ ioBuffer.reset(); // make sure to reset the IO buffer!!
+ }
+ }
+}
\ No newline at end of file