You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/04/15 01:32:28 UTC
[2/3] lucenenet git commit: More porting work
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index ecb534f..d0502aa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -2,145 +2,148 @@
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
using Lucene.Net.Util;
-using org.apache.lucene.analysis.standard;
namespace Lucene.Net.Analysis.Standard
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
/// <summary>
- /// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
- /// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
- /// English stop words.
- ///
- /// <a name="version"/>
- /// <para>You must specify the required <seealso cref="LuceneVersion"/>
- /// compatibility when creating StandardAnalyzer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- /// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
- /// and StopFilter correctly handles Unicode 4.0 supplementary characters
- /// in stopwords. <seealso cref="ClassicTokenizer"/> and <seealso cref="ClassicAnalyzer"/>
- /// are the pre-3.1 implementations of StandardTokenizer and
- /// StandardAnalyzer.
- /// <li> As of 2.9, StopFilter preserves position increments
- /// <li> As of 2.4, Tokens incorrectly identified as acronyms
- /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
- /// </ul>
- /// </para>
- /// </summary>
- public sealed class StandardAnalyzer : StopwordAnalyzerBase
- {
-
- /// <summary>
- /// Default maximum allowed token length </summary>
- public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
-
- private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
-
- /// <summary>
- /// An unmodifiable set containing some common English words that are usually not
- /// useful for searching.
- /// </summary>
- public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
-
- /// <summary>
- /// Builds an analyzer with the given stop words. </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopWords"> stop words </param>
- public StandardAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
- {
- }
-
- /// <summary>
- /// Builds an analyzer with the default stop words ({@link
- /// #STOP_WORDS_SET}). </summary>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- public StandardAnalyzer(LuceneVersion matchVersion) : this(matchVersion, STOP_WORDS_SET)
- {
- }
-
- /// <summary>
- /// Builds an analyzer with the stop words from the given reader. </summary>
- /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
- /// <param name="matchVersion"> Lucene version to match See {@link
- /// <a href="#version">above</a>} </param>
- /// <param name="stopwords"> Reader to read stop words from </param>
- public StandardAnalyzer(LuceneVersion matchVersion, TextReader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
- {
- }
-
- /// <summary>
- /// Set maximum allowed token length. If a token is seen
- /// that exceeds this length then it is discarded. This
- /// setting only takes effect the next time tokenStream or
- /// tokenStream is called.
- /// </summary>
- public int MaxTokenLength
- {
- set
- {
- maxTokenLength = value;
- }
- get
- {
- return maxTokenLength;
- }
- }
-
-
- protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
- {
- var src = new StandardTokenizer(matchVersion, reader);
- src.MaxTokenLength = maxTokenLength;
- TokenStream tok = new StandardFilter(matchVersion, src);
- tok = new LowerCaseFilter(matchVersion, tok);
- tok = new StopFilter(matchVersion, tok, stopwords);
- return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
- }
-
- private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
- {
- private readonly StandardAnalyzer outerInstance;
-
- private TextReader reader;
- private readonly StandardTokenizer src;
-
- public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
- {
- this.outerInstance = outerInstance;
- this.reader = reader;
- this.src = src;
- }
-
- protected internal override Reader Reader
- {
- set
- {
- src.MaxTokenLength = outerInstance.maxTokenLength;
- base.Reader = value;
- }
- }
- }
- }
+ /// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
+ /// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
+ /// English stop words.
+ ///
+ /// <a name="version"/>
+ /// <para>You must specify the required <seealso cref="LuceneVersion"/>
+ /// compatibility when creating StandardAnalyzer:
+ /// <ul>
+ /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// from their combining characters. If you use a previous version number,
+ /// you get the exact broken behavior for backwards compatibility.
+ /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation,
+ /// and StopFilter correctly handles Unicode 4.0 supplementary characters
+ /// in stopwords. <seealso cref="ClassicTokenizer"/> and <seealso cref="ClassicAnalyzer"/>
+ /// are the pre-3.1 implementations of StandardTokenizer and
+ /// StandardAnalyzer.
+ /// <li> As of 2.9, StopFilter preserves position increments
+ /// <li> As of 2.4, Tokens incorrectly identified as acronyms
+ /// are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1068</a>)
+ /// </ul>
+ /// </para>
+ /// </summary>
+ public sealed class StandardAnalyzer : StopwordAnalyzerBase
+ {
+
+ /// <summary>
+ /// Default maximum allowed token length </summary>
+ public const int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+ private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+ /// <summary>
+ /// An unmodifiable set containing some common English words that are usually not
+ /// useful for searching.
+ /// </summary>
+ public static readonly CharArraySet STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
+
+ /// <summary>
+ /// Builds an analyzer with the given stop words. </summary>
+ /// <param name="matchVersion"> Lucene version to match See {@link
+ /// <a href="#version">above</a>} </param>
+ /// <param name="stopWords"> stop words </param>
+ public StandardAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
+ : base(matchVersion, stopWords)
+ {
+ }
+
+ /// <summary>
+ /// Builds an analyzer with the default stop words ({@link
+ /// #STOP_WORDS_SET}). </summary>
+ /// <param name="matchVersion"> Lucene version to match See {@link
+ /// <a href="#version">above</a>} </param>
+ public StandardAnalyzer(LuceneVersion matchVersion)
+ : this(matchVersion, STOP_WORDS_SET)
+ {
+ }
+
+ /// <summary>
+ /// Builds an analyzer with the stop words from the given reader. </summary>
+ /// <seealso cref= WordlistLoader#getWordSet(Reader, Version) </seealso>
+ /// <param name="matchVersion"> Lucene version to match See {@link
+ /// <a href="#version">above</a>} </param>
+ /// <param name="stopwords"> Reader to read stop words from </param>
+ public StandardAnalyzer(LuceneVersion matchVersion, TextReader stopwords)
+ : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+ {
+ }
+
+ /// <summary>
+ /// Set maximum allowed token length. If a token is seen
+ /// that exceeds this length then it is discarded. This
+ /// setting only takes effect the next time tokenStream or
+ /// tokenStream is called.
+ /// </summary>
+ public int MaxTokenLength
+ {
+ set
+ {
+ maxTokenLength = value;
+ }
+ get
+ {
+ return maxTokenLength;
+ }
+ }
+
+
+ public override TokenStreamComponents CreateComponents(string fieldName, TextReader reader)
+ {
+ var src = new StandardTokenizer(matchVersion, reader);
+ src.MaxTokenLength = maxTokenLength;
+ TokenStream tok = new StandardFilter(matchVersion, src);
+ tok = new LowerCaseFilter(matchVersion, tok);
+ tok = new StopFilter(matchVersion, tok, stopwords);
+ return new TokenStreamComponentsAnonymousInnerClassHelper(this, src, tok, reader);
+ }
+
+ private class TokenStreamComponentsAnonymousInnerClassHelper : TokenStreamComponents
+ {
+ private readonly StandardAnalyzer outerInstance;
+
+ private TextReader reader;
+ private readonly StandardTokenizer src;
+
+ public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, TextReader reader)
+ : base(src, tok)
+ {
+ this.outerInstance = outerInstance;
+ this.reader = reader;
+ this.src = src;
+ }
+
+ protected override TextReader Reader
+ {
+ set
+ {
+ src.MaxTokenLength = outerInstance.maxTokenLength;
+ base.Reader = value;
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
index a2641ce..6093cd6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
@@ -1,102 +1,91 @@
-using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ /// <summary>
+ /// Normalizes tokens extracted with <seealso cref="StandardTokenizer"/>.
+ /// </summary>
+ public class StandardFilter : TokenFilter
+ {
+ private readonly LuceneVersion matchVersion;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using Version = org.apache.lucene.util.Version;
+ public StandardFilter(LuceneVersion matchVersion, TokenStream @in)
+ : base(@in)
+ {
+ this.matchVersion = matchVersion;
+ typeAtt = AddAttribute<ITypeAttribute>();
+ termAtt = AddAttribute<ICharTermAttribute>();
+ }
- /// <summary>
- /// Normalizes tokens extracted with <seealso cref="StandardTokenizer"/>.
- /// </summary>
- public class StandardFilter : TokenFilter
- {
- private readonly Version matchVersion;
+ private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
+ private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
- public StandardFilter(Version matchVersion, TokenStream @in) : base(@in)
- {
- this.matchVersion = matchVersion;
- }
+ // this filters uses attribute type
+ private readonly ITypeAttribute typeAtt;
+ private readonly ICharTermAttribute termAtt;
- private static readonly string APOSTROPHE_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.APOSTROPHE];
- private static readonly string ACRONYM_TYPE = ClassicTokenizer.TOKEN_TYPES[ClassicTokenizer.ACRONYM];
+ public override bool IncrementToken()
+ {
+ if (matchVersion.OnOrAfter(LuceneVersion.LUCENE_31))
+ {
+ return input.IncrementToken(); // TODO: add some niceties for the new grammar
+ }
+ else
+ {
+ return IncrementTokenClassic();
+ }
+ }
- // this filters uses attribute type
- private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
+ public bool IncrementTokenClassic()
+ {
+ if (!input.IncrementToken())
+ {
+ return false;
+ }
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- if (matchVersion.onOrAfter(Version.LUCENE_31))
- {
- return input.incrementToken(); // TODO: add some niceties for the new grammar
- }
- else
- {
- return incrementTokenClassic();
- }
- }
+ char[] buffer = termAtt.Buffer();
+ int bufferLength = termAtt.Length;
+ string type = typeAtt.Type;
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public final boolean incrementTokenClassic() throws java.io.IOException
- public bool incrementTokenClassic()
- {
- if (!input.incrementToken())
- {
- return false;
- }
+ if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
+ {
+ // Strip last 2 characters off
+ termAtt.Length = bufferLength - 2;
+ } // remove dots
+ else if (type == ACRONYM_TYPE)
+ {
+ int upto = 0;
+ for (int i = 0; i < bufferLength; i++)
+ {
+ char c = buffer[i];
+ if (c != '.')
+ {
+ buffer[upto++] = c;
+ }
+ }
+ termAtt.Length = upto;
+ }
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final char[] buffer = termAtt.buffer();
- char[] buffer = termAtt.buffer();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int bufferLength = termAtt.length();
- int bufferLength = termAtt.length();
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final String type = typeAtt.type();
- string type = typeAtt.type();
-
- if (type == APOSTROPHE_TYPE && bufferLength >= 2 && buffer[bufferLength - 2] == '\'' && (buffer[bufferLength - 1] == 's' || buffer[bufferLength - 1] == 'S')) // remove 's
- {
- // Strip last 2 characters off
- termAtt.Length = bufferLength - 2;
- } // remove dots
- else if (type == ACRONYM_TYPE)
- {
- int upto = 0;
- for (int i = 0;i < bufferLength;i++)
- {
- char c = buffer[i];
- if (c != '.')
- {
- buffer[upto++] = c;
- }
- }
- termAtt.Length = upto;
- }
-
- return true;
- }
- }
+ return true;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
index eab0156..b634397 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilterFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
using TokenFilterFactory = Lucene.Net.Analysis.Util.TokenFilterFactory;
namespace org.apache.lucene.analysis.standard
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
index abf55e8..e47b481 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -16,7 +16,10 @@
*/
using System;
using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
using org.apache.lucene.analysis.standard;
+using Version = Lucene.Net.Util.LuceneVersion;
+using Reader = System.IO.TextReader;
namespace Lucene.Net.Analysis.Standard
{
@@ -144,15 +147,15 @@ namespace Lucene.Net.Analysis.Standard
{
this.scanner = new StandardTokenizerImpl(input);
}
- else if (matchVersion.onOrAfter(Version.LUCENE_40))
+ else if (matchVersion.OnOrAfter(Version.LUCENE_40))
{
this.scanner = new StandardTokenizerImpl40(input);
}
- else if (matchVersion.onOrAfter(Version.LUCENE_34))
+ else if (matchVersion.OnOrAfter(Version.LUCENE_34))
{
this.scanner = new StandardTokenizerImpl34(input);
}
- else if (matchVersion.onOrAfter(Version.LUCENE_31))
+ else if (matchVersion.OnOrAfter(Version.LUCENE_31))
{
this.scanner = new StandardTokenizerImpl31(input);
}
@@ -229,9 +232,9 @@ namespace Lucene.Net.Analysis.Standard
posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
}
- public override void Close()
+ public override void Dispose()
{
- base.Close();
+ base.Dispose();
scanner.yyreset(input);
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
index 0b6bbe6..2c4560f 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
@@ -1,8 +1,9 @@
using System.Collections.Generic;
-using Lucene.Net.Analysis.Standard;
-using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
+using System.IO;
+using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
/*
@@ -21,12 +22,7 @@ namespace org.apache.lucene.analysis.standard
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using TokenizerFactory = TokenizerFactory;
- using AttributeFactory = org.apache.lucene.util.AttributeSource.AttributeFactory;
-
-
- /// <summary>
+ /// <summary>
/// Factory for <seealso cref="StandardTokenizer"/>.
/// <pre class="prettyprint">
/// <fieldType name="text_stndrd" class="solr.TextField" positionIncrementGap="100">
@@ -51,9 +47,9 @@ namespace org.apache.lucene.analysis.standard
}
}
- public override StandardTokenizer create(AttributeFactory factory, Reader input)
+ public override Tokenizer Create(AttributeSource.AttributeFactory factory, TextReader input)
{
- StandardTokenizer tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
+ var tokenizer = new StandardTokenizer(luceneMatchVersion, factory, input);
tokenizer.MaxTokenLength = maxTokenLength;
return tokenizer;
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
index 86ba884..44a9bbe 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -22,8 +22,6 @@ namespace org.apache.lucene.analysis.standard
* limitations under the License.
*/
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
/// <summary>
/// This class implements Word Break rules from the Unicode Text Segmentation
/// algorithm, as specified in
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index 628ca23..273896b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -1,9 +1,10 @@
using Lucene.Net.Analysis.Core;
-using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
-using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
+using Reader = System.IO.TextReader;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
/*
@@ -22,18 +23,9 @@ namespace org.apache.lucene.analysis.standard
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using LowerCaseFilter = LowerCaseFilter;
- using StopAnalyzer = StopAnalyzer;
- using StopFilter = StopFilter;
- using CharArraySet = CharArraySet;
- using StopwordAnalyzerBase = StopwordAnalyzerBase;
- using Version = org.apache.lucene.util.Version;
-
-
- /// <summary>
+ /// <summary>
/// Filters <seealso cref="org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer"/>
- /// with <seealso cref="org.apache.lucene.analysis.standard.StandardFilter"/>,
+ /// with <seealso cref="StandardFilter"/>,
/// <seealso cref="LowerCaseFilter"/> and
/// <seealso cref="StopFilter"/>, using a list of
/// English stop words.
@@ -64,7 +56,7 @@ namespace org.apache.lucene.analysis.standard
/// <param name="matchVersion"> Lucene version to match See {@link
/// <a href="#version">above</a>} </param>
/// <param name="stopWords"> stop words </param>
- public UAX29URLEmailAnalyzer(Version matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
+ public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords) : base(matchVersion, stopWords)
{
}
@@ -73,7 +65,7 @@ namespace org.apache.lucene.analysis.standard
/// #STOP_WORDS_SET}). </summary>
/// <param name="matchVersion"> Lucene version to match See {@link
/// <a href="#version">above</a>} </param>
- public UAX29URLEmailAnalyzer(Version matchVersion) : this(matchVersion, STOP_WORDS_SET)
+ public UAX29URLEmailAnalyzer(LuceneVersion matchVersion) : this(matchVersion, STOP_WORDS_SET)
{
}
@@ -83,9 +75,7 @@ namespace org.apache.lucene.analysis.standard
/// <param name="matchVersion"> Lucene version to match See {@link
/// <a href="#version">above</a>} </param>
/// <param name="stopwords"> Reader to read stop words from </param>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public UAX29URLEmailAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
- public UAX29URLEmailAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
+ public UAX29URLEmailAnalyzer(LuceneVersion matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
{
}
@@ -108,12 +98,8 @@ namespace org.apache.lucene.analysis.standard
}
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
- protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
+ public override TokenStreamComponents CreateComponents(string fieldName, Reader reader)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
UAX29URLEmailTokenizer src = new UAX29URLEmailTokenizer(matchVersion, reader);
src.MaxTokenLength = maxTokenLength;
TokenStream tok = new StandardFilter(matchVersion, src);
@@ -127,19 +113,16 @@ namespace org.apache.lucene.analysis.standard
private readonly UAX29URLEmailAnalyzer outerInstance;
private Reader reader;
- private org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src;
+ private UAX29URLEmailTokenizer src;
- public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+ public TokenStreamComponentsAnonymousInnerClassHelper(UAX29URLEmailAnalyzer outerInstance, UAX29URLEmailTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
{
this.outerInstance = outerInstance;
this.reader = reader;
this.src = src;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
- protected internal override Reader Reader
+ protected override Reader Reader
{
set
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
index f319675..4faa921 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharArraySet.cs
@@ -1,7 +1,6 @@
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Util;
-using org.apache.lucene.analysis.util;
namespace Lucene.Net.Analysis.Util
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
index 1fd76f8..f4fa262 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizer.cs
@@ -1,7 +1,4 @@
-using System.Collections.Generic;
-using System.Text;
-
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -17,20 +14,14 @@ using System.Text;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+using System.Collections.Generic;
+using System.Text;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
-namespace org.apache.lucene.analysis.wikipedia
+namespace Lucene.Net.Analysis.Wikipedia
{
-
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using FlagsAttribute = org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using AttributeSource = org.apache.lucene.util.AttributeSource;
-
-
-
- /// <summary>
+ /// <summary>
/// Extension of StandardTokenizer that is aware of Wikipedia syntax. It is based off of the
/// Wikipedia tutorial available at http://en.wikipedia.org/wiki/Wikipedia:Tutorial, but it may not be complete.
/// <p/>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
index ad7027f..e320469 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Wikipedia/WikipediaTokenizerFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Wikipedia;
using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
namespace org.apache.lucene.analysis.wikipedia
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/b4eaf2fc/src/Lucene.Net.Core/Util/StringHelper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Util/StringHelper.cs b/src/Lucene.Net.Core/Util/StringHelper.cs
index 9a8dc3c..74d6992 100644
--- a/src/Lucene.Net.Core/Util/StringHelper.cs
+++ b/src/Lucene.Net.Core/Util/StringHelper.cs
@@ -66,18 +66,18 @@ namespace Lucene.Net.Util
}
}
- private static IComparer<string> versionComparator = new ComparatorAnonymousInnerClassHelper();
+ private static readonly IComparer<string> versionComparator = new ComparatorAnonymousInnerClassHelper();
- private class ComparatorAnonymousInnerClassHelper : IComparer<string>
+ private sealed class ComparatorAnonymousInnerClassHelper : IComparer<string>
{
public ComparatorAnonymousInnerClassHelper()
{
}
- public virtual int Compare(string a, string b)
+ public int Compare(string a, string b)
{
- StringTokenizer aTokens = new StringTokenizer(a, ".");
- StringTokenizer bTokens = new StringTokenizer(b, ".");
+ var aTokens = new StringTokenizer(a, ".");
+ var bTokens = new StringTokenizer(b, ".");
while (aTokens.HasMoreTokens())
{