You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2015/01/07 19:33:35 UTC
lucenenet git commit: More porting work on Analysis.Common
Repository: lucenenet
Updated Branches:
refs/heads/master 78c524d26 -> 39ec9d8d0
More porting work on Analysis.Common
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/39ec9d8d
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/39ec9d8d
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/39ec9d8d
Branch: refs/heads/master
Commit: 39ec9d8d00636ac6b868325a6d9be9d4c6c5f3f1
Parents: 78c524d
Author: Itamar Syn-Hershko <it...@code972.com>
Authored: Wed Jan 7 20:08:43 2015 +0200
Committer: Itamar Syn-Hershko <it...@code972.com>
Committed: Wed Jan 7 20:08:43 2015 +0200
----------------------------------------------------------------------
.../Lucene47WordDelimiterFilter.cs | 33 +-
.../Miscellaneous/WordDelimiterFilter.cs | 3 +-
.../Analysis/Position/PositionFilter.cs | 167 +++----
.../Analysis/Shingle/ShingleAnalyzerWrapper.cs | 10 +-
.../Analysis/Standard/ClassicAnalyzer.cs | 1 +
.../Analysis/Standard/ClassicTokenizer.cs | 2 +
.../Standard/ClassicTokenizerFactory.cs | 1 +
.../Analysis/Standard/ClassicTokenizerImpl.cs | 2 +-
.../Analysis/Standard/StandardAnalyzer.cs | 31 +-
.../Analysis/Standard/StandardFilter.cs | 4 +-
.../Analysis/Standard/StandardTokenizer.cs | 465 +++++++++----------
.../Standard/StandardTokenizerFactory.cs | 1 +
.../Analysis/Standard/StandardTokenizerImpl.cs | 2 +-
.../Analysis/Standard/UAX29URLEmailAnalyzer.cs | 1 +
.../Analysis/Standard/UAX29URLEmailTokenizer.cs | 4 +-
.../Standard/UAX29URLEmailTokenizerFactory.cs | 1 +
16 files changed, 351 insertions(+), 377 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
index da3bda4..950d0ca 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/Lucene47WordDelimiterFilter.cs
@@ -16,22 +16,13 @@ using System.Text;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Util;
+using Lucene.Net.Util;
namespace org.apache.lucene.analysis.miscellaneous
{
- using WhitespaceTokenizer = WhitespaceTokenizer;
- using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using CharArraySet = CharArraySet;
- using ArrayUtil = org.apache.lucene.util.ArrayUtil;
- using RamUsageEstimator = org.apache.lucene.util.RamUsageEstimator;
-
/// <summary>
/// Old Broken version of <seealso cref="WordDelimiterFilter"/>
/// </summary>
@@ -194,21 +185,19 @@ namespace org.apache.lucene.analysis.miscellaneous
}
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
+ public override bool IncrementToken()
{
while (true)
{
if (!hasSavedState)
{
// process a new input word
- if (!input.incrementToken())
+ if (!input.IncrementToken())
{
return false;
}
- int termLength = termAttribute.length();
+ int termLength = termAttribute.Length();
char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.PositionIncrement;
@@ -328,11 +317,9 @@ namespace org.apache.lucene.analysis.miscellaneous
/// <summary>
/// {@inheritDoc}
/// </summary>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
+ public override void Reset()
{
- base.reset();
+ base.Reset();
hasSavedState = false;
concat.clear();
concatAll.clear();
@@ -355,7 +342,7 @@ namespace org.apache.lucene.analysis.miscellaneous
if (savedBuffer.Length < termAttribute.length())
{
- savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
+ savedBuffer = new char[ArrayUtil.Oversize(termAttribute.Length(), RamUsageEstimator.NUM_BYTES_CHAR)];
}
Array.Copy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
@@ -561,9 +548,9 @@ namespace org.apache.lucene.analysis.miscellaneous
/// <summary>
/// Writes the concatenation to the attributes
/// </summary>
- internal void write()
+ private void Write()
{
- clearAttributes();
+ ClearAttributes();
if (outerInstance.termAttribute.length() < buffer.Length)
{
outerInstance.termAttribute.resizeBuffer(buffer.Length);
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
index 2b1811c..be6ba5d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/WordDelimiterFilter.cs
@@ -17,13 +17,14 @@ using System.Text;
* limitations under the License.
*/
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
namespace org.apache.lucene.analysis.miscellaneous
{
using WhitespaceTokenizer = WhitespaceTokenizer;
- using StandardTokenizer = org.apache.lucene.analysis.standard.StandardTokenizer;
+ using StandardTokenizer = StandardTokenizer;
using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
index 104422b..56bb704 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Position/PositionFilter.cs
@@ -4,95 +4,98 @@ using Lucene.Net.Analysis.Tokenattributes;
namespace Lucene.Net.Analysis.Position
{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
/// <summary>
- /// Set the positionIncrement of all tokens to the "positionIncrement",
- /// except the first return token which retains its original positionIncrement value.
- /// The default positionIncrement value is zero. </summary>
- /// @deprecated (4.4) PositionFilter makes <seealso cref="TokenStream"/> graphs inconsistent
- /// which can cause highlighting bugs. Its main use-case being to make
- /// <a href="{@docRoot}/../queryparser/overview-summary.html">QueryParser</a>
- /// generate boolean queries instead of phrase queries, it is now advised to use
- /// {@code QueryParser.setAutoGeneratePhraseQueries(boolean)}
- /// (for simple cases) or to override {@code QueryParser.newFieldQuery}.
- [Obsolete("(4.4) PositionFilter makes <seealso cref="TokenStream"/> graphs inconsistent")]
- public sealed class PositionFilter : TokenFilter
- {
+ /// Set the positionIncrement of all tokens to the "positionIncrement",
+ /// except the first return token which retains its original positionIncrement value.
+ /// The default positionIncrement value is zero. </summary>
+ /// @deprecated (4.4) PositionFilter makes <seealso cref="TokenStream"/> graphs inconsistent
+ /// which can cause highlighting bugs. Its main use-case being to make
+ /// <a href="{@docRoot}/../queryparser/overview-summary.html">QueryParser</a>
+ /// generate boolean queries instead of phrase queries, it is now advised to use
+ /// {@code QueryParser.setAutoGeneratePhraseQueries(boolean)}
+ /// (for simple cases) or to override {@code QueryParser.newFieldQuery}.
+ [Obsolete("(4.4) PositionFilter makes <seealso cref=\"TokenStream\"/> graphs inconsistent")]
+ public sealed class PositionFilter : TokenFilter
+ {
- /// <summary>
- /// Position increment to assign to all but the first token - default = 0 </summary>
- private readonly int positionIncrement;
+ /// <summary>
+ /// Position increment to assign to all but the first token - default = 0 </summary>
+ private readonly int positionIncrement;
- /// <summary>
- /// The first token must have non-zero positionIncrement * </summary>
- private bool firstTokenPositioned = false;
+ /// <summary>
+ /// The first token must have non-zero positionIncrement * </summary>
+ private bool firstTokenPositioned = false;
- private PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
+ private readonly IPositionIncrementAttribute posIncrAtt;
- /// <summary>
- /// Constructs a PositionFilter that assigns a position increment of zero to
- /// all but the first token from the given input stream.
- /// </summary>
- /// <param name="input"> the input stream </param>
- public PositionFilter(TokenStream input) : this(input, 0)
- {
- }
+ /// <summary>
+ /// Constructs a PositionFilter that assigns a position increment of zero to
+ /// all but the first token from the given input stream.
+ /// </summary>
+ /// <param name="input"> the input stream </param>
+ public PositionFilter(TokenStream input)
+ : this(input, 0)
+ {
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ }
- /// <summary>
- /// Constructs a PositionFilter that assigns the given position increment to
- /// all but the first token from the given input stream.
- /// </summary>
- /// <param name="input"> the input stream </param>
- /// <param name="positionIncrement"> position increment to assign to all but the first
- /// token from the input stream </param>
- public PositionFilter(TokenStream input, int positionIncrement) : base(input)
- {
- if (positionIncrement < 0)
- {
- throw new System.ArgumentException("positionIncrement may not be negative");
- }
- this.positionIncrement = positionIncrement;
- }
+ /// <summary>
+ /// Constructs a PositionFilter that assigns the given position increment to
+ /// all but the first token from the given input stream.
+ /// </summary>
+ /// <param name="input"> the input stream </param>
+ /// <param name="positionIncrement"> position increment to assign to all but the first
+ /// token from the input stream </param>
+ public PositionFilter(TokenStream input, int positionIncrement)
+ : base(input)
+ {
+ if (positionIncrement < 0)
+ {
+ throw new System.ArgumentException("positionIncrement may not be negative");
+ }
+ this.positionIncrement = positionIncrement;
+ }
- public override bool IncrementToken()
- {
- if (input.IncrementToken())
- {
- if (firstTokenPositioned)
- {
- posIncrAtt.PositionIncrement = positionIncrement;
- }
- else
- {
- firstTokenPositioned = true;
- }
- return true;
- }
- else
- {
- return false;
- }
- }
+ public override bool IncrementToken()
+ {
+ if (input.IncrementToken())
+ {
+ if (firstTokenPositioned)
+ {
+ posIncrAtt.PositionIncrement = positionIncrement;
+ }
+ else
+ {
+ firstTokenPositioned = true;
+ }
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
public override void Reset()
- {
- base.Reset();
- firstTokenPositioned = false;
- }
- }
+ {
+ base.Reset();
+ firstTokenPositioned = false;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
index 06c5e10..b7d1f5d 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Shingle/ShingleAnalyzerWrapper.cs
@@ -1,4 +1,9 @@
-namespace org.apache.lucene.analysis.shingle
+using Lucene.Net.Analysis.Standard;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.shingle;
+using org.apache.lucene.analysis.standard;
+
+namespace Lucene.Net.Analysis.Shingle
{
/*
@@ -18,9 +23,6 @@
* limitations under the License.
*/
- using StandardAnalyzer = org.apache.lucene.analysis.standard.StandardAnalyzer;
- using Version = org.apache.lucene.util.Version;
-
/// <summary>
/// A ShingleAnalyzerWrapper wraps a <seealso cref="ShingleFilter"/> around another <seealso cref="Analyzer"/>.
/// <para>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
index dcfe368..de32d23 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicAnalyzer.cs
@@ -1,4 +1,5 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
index a41f48d..f9c680e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizer.cs
@@ -15,6 +15,8 @@
* limitations under the License.
*/
+using Lucene.Net.Analysis.Standard;
+
namespace org.apache.lucene.analysis.standard
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
index 6753039..5b6442e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
namespace org.apache.lucene.analysis.standard
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
index 344d817..5db482e 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/ClassicTokenizerImpl.cs
@@ -1,6 +1,6 @@
using System;
-
/* The following code was generated by JFlex 1.5.1 */
+using Lucene.Net.Analysis.Standard;
namespace org.apache.lucene.analysis.standard
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
index 392f656..80ea22b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardAnalyzer.cs
@@ -1,8 +1,9 @@
using Lucene.Net.Analysis.Core;
using Lucene.Net.Analysis.Util;
-using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
+using Lucene.Net.Util;
+using org.apache.lucene.analysis.standard;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
/*
@@ -21,18 +22,7 @@ namespace org.apache.lucene.analysis.standard
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
- using org.apache.lucene.analysis;
- using LowerCaseFilter = LowerCaseFilter;
- using StopAnalyzer = StopAnalyzer;
- using StopFilter = StopFilter;
- using CharArraySet = CharArraySet;
- using StopwordAnalyzerBase = StopwordAnalyzerBase;
- using WordlistLoader = WordlistLoader;
- using Version = org.apache.lucene.util.Version;
-
-
- /// <summary>
+ /// <summary>
/// Filters <seealso cref="StandardTokenizer"/> with <seealso cref="StandardFilter"/>, {@link
/// LowerCaseFilter} and <seealso cref="StopFilter"/>, using a list of
/// English stop words.
@@ -94,8 +84,6 @@ namespace org.apache.lucene.analysis.standard
/// <param name="matchVersion"> Lucene version to match See {@link
/// <a href="#version">above</a>} </param>
/// <param name="stopwords"> Reader to read stop words from </param>
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: public StandardAnalyzer(org.apache.lucene.util.Version matchVersion, java.io.Reader stopwords) throws java.io.IOException
public StandardAnalyzer(Version matchVersion, Reader stopwords) : this(matchVersion, loadStopwordSet(stopwords, matchVersion))
{
}
@@ -119,12 +107,8 @@ namespace org.apache.lucene.analysis.standard
}
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
-//ORIGINAL LINE: @Override protected TokenStreamComponents createComponents(final String fieldName, final java.io.Reader reader)
protected internal override TokenStreamComponents createComponents(string fieldName, Reader reader)
{
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
StandardTokenizer src = new StandardTokenizer(matchVersion, reader);
src.MaxTokenLength = maxTokenLength;
TokenStream tok = new StandardFilter(matchVersion, src);
@@ -138,18 +122,15 @@ namespace org.apache.lucene.analysis.standard
private readonly StandardAnalyzer outerInstance;
private Reader reader;
- private org.apache.lucene.analysis.standard.StandardTokenizer src;
+ private StandardTokenizer src;
- public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, org.apache.lucene.analysis.standard.StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
+ public TokenStreamComponentsAnonymousInnerClassHelper(StandardAnalyzer outerInstance, StandardTokenizer src, TokenStream tok, Reader reader) : base(src, tok)
{
this.outerInstance = outerInstance;
this.reader = reader;
this.src = src;
}
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override protected void setReader(final java.io.Reader reader) throws java.io.IOException
-//JAVA TO C# CONVERTER WARNING: 'final' parameters are not available in .NET:
protected internal override Reader Reader
{
set
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
index 20e4f64..a2641ce 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardFilter.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.standard
+using Lucene.Net.Analysis.Standard;
+
+namespace org.apache.lucene.analysis.standard
{
/*
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
index afde960..abf55e8 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizer.cs
@@ -1,6 +1,4 @@
-using System;
-
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@@ -16,242 +14,233 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using org.apache.lucene.analysis.standard;
-namespace org.apache.lucene.analysis.standard
+namespace Lucene.Net.Analysis.Standard
{
-
-
- using StandardTokenizerImpl31 = org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
- using StandardTokenizerImpl34 = org.apache.lucene.analysis.standard.std34.StandardTokenizerImpl34;
- using StandardTokenizerImpl40 = org.apache.lucene.analysis.standard.std40.StandardTokenizerImpl40;
- using CharTermAttribute = org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
- using OffsetAttribute = org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
- using PositionIncrementAttribute = org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
- using TypeAttribute = org.apache.lucene.analysis.tokenattributes.TypeAttribute;
- using Version = org.apache.lucene.util.Version;
-
- /// <summary>
- /// A grammar-based tokenizer constructed with JFlex.
- /// <para>
- /// As of Lucene version 3.1, this class implements the Word Break rules from the
- /// Unicode Text Segmentation algorithm, as specified in
- /// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
- /// <p/>
- /// </para>
- /// <para>Many applications have specific tokenizer needs. If this tokenizer does
- /// not suit your application, please consider copying this source code
- /// directory to your project and maintaining your own grammar-based tokenizer.
- ///
- /// <a name="version"/>
- /// </para>
- /// <para>You must specify the required <seealso cref="Version"/>
- /// compatibility when creating StandardTokenizer:
- /// <ul>
- /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
- /// from their combining characters. If you use a previous version number,
- /// you get the exact broken behavior for backwards compatibility.
- /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
- /// If you use a previous version number, you get the exact behavior of
- /// <seealso cref="ClassicTokenizer"/> for backwards compatibility.
- /// </ul>
- /// </para>
- /// </summary>
-
- public sealed class StandardTokenizer : Tokenizer
- {
- /// <summary>
- /// A private instance of the JFlex-constructed scanner </summary>
- private StandardTokenizerInterface scanner;
-
- public const int ALPHANUM = 0;
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int APOSTROPHE = 1;
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int ACRONYM = 2;
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int COMPANY = 3;
- public const int EMAIL = 4;
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int HOST = 5;
- public const int NUM = 6;
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int CJ = 7;
-
- /// @deprecated (3.1)
- [Obsolete("(3.1)")]
- public const int ACRONYM_DEP = 8;
-
- public const int SOUTHEAST_ASIAN = 9;
- public const int IDEOGRAPHIC = 10;
- public const int HIRAGANA = 11;
- public const int KATAKANA = 12;
- public const int HANGUL = 13;
-
- /// <summary>
- /// String token types that correspond to token type int constants </summary>
- public static readonly string[] TOKEN_TYPES = new string [] {"<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>", "<SOUTHEAST_ASIAN>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>", "<HANGUL>"};
-
- private int skippedPositions;
-
- private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
-
- /// <summary>
- /// Set the max allowed token length. Any token longer
- /// than this is skipped.
- /// </summary>
- public int MaxTokenLength
- {
- set
- {
- if (value < 1)
- {
- throw new System.ArgumentException("maxTokenLength must be greater than zero");
- }
- this.maxTokenLength = value;
- }
- get
- {
- return maxTokenLength;
- }
- }
-
-
- /// <summary>
- /// Creates a new instance of the <seealso cref="org.apache.lucene.analysis.standard.StandardTokenizer"/>. Attaches
- /// the <code>input</code> to the newly created JFlex scanner.
- /// </summary>
- /// <param name="input"> The input reader
- ///
- /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
- public StandardTokenizer(Version matchVersion, Reader input) : base(input)
- {
- init(matchVersion);
- }
-
- /// <summary>
- /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
- /// </summary>
- public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) : base(factory, input)
- {
- init(matchVersion);
- }
-
- private void init(Version matchVersion)
- {
- if (matchVersion.onOrAfter(Version.LUCENE_47))
- {
- this.scanner = new StandardTokenizerImpl(input);
- }
- else if (matchVersion.onOrAfter(Version.LUCENE_40))
- {
- this.scanner = new StandardTokenizerImpl40(input);
- }
- else if (matchVersion.onOrAfter(Version.LUCENE_34))
- {
- this.scanner = new StandardTokenizerImpl34(input);
- }
- else if (matchVersion.onOrAfter(Version.LUCENE_31))
- {
- this.scanner = new StandardTokenizerImpl31(input);
- }
- else
- {
- this.scanner = new ClassicTokenizerImpl(input);
- }
- }
-
- // this tokenizer generates three attributes:
- // term offset, positionIncrement and type
- private readonly CharTermAttribute termAtt = addAttribute(typeof(CharTermAttribute));
- private readonly OffsetAttribute offsetAtt = addAttribute(typeof(OffsetAttribute));
- private readonly PositionIncrementAttribute posIncrAtt = addAttribute(typeof(PositionIncrementAttribute));
- private readonly TypeAttribute typeAtt = addAttribute(typeof(TypeAttribute));
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.lucene.analysis.TokenStream#next()
- */
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final boolean incrementToken() throws java.io.IOException
- public override bool incrementToken()
- {
- clearAttributes();
- skippedPositions = 0;
-
- while (true)
- {
- int tokenType = scanner.NextToken;
-
- if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
- {
- return false;
- }
-
- if (scanner.yylength() <= maxTokenLength)
- {
- posIncrAtt.PositionIncrement = skippedPositions + 1;
- scanner.getText(termAtt);
-//JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
-//ORIGINAL LINE: final int start = scanner.yychar();
- int start = scanner.yychar();
- offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
- // This 'if' should be removed in the next release. For now, it converts
- // invalid acronyms to HOST. When removed, only the 'else' part should
- // remain.
- if (tokenType == StandardTokenizer.ACRONYM_DEP)
- {
- typeAtt.Type = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST];
- termAtt.Length = termAtt.length() - 1; // remove extra '.'
- }
- else
- {
- typeAtt.Type = StandardTokenizer.TOKEN_TYPES[tokenType];
- }
- return true;
- }
- else
- // When we skip a too-long term, we still increment the
- // position increment
- {
- skippedPositions++;
- }
- }
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public final void end() throws java.io.IOException
- public override void end()
- {
- base.end();
- // set final offset
- int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
- offsetAtt.setOffset(finalOffset, finalOffset);
- // adjust any skipped tokens
- posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void close() throws java.io.IOException
- public override void close()
- {
- base.close();
- scanner.yyreset(input);
- }
-
-//JAVA TO C# CONVERTER WARNING: Method 'throws' clauses are not available in .NET:
-//ORIGINAL LINE: @Override public void reset() throws java.io.IOException
- public override void reset()
- {
- base.reset();
- scanner.yyreset(input);
- skippedPositions = 0;
- }
- }
+ /// <summary>
+ /// A grammar-based tokenizer constructed with JFlex.
+ /// <para>
+ /// As of Lucene version 3.1, this class implements the Word Break rules from the
+ /// Unicode Text Segmentation algorithm, as specified in
+ /// <a href="http://unicode.org/reports/tr29/">Unicode Standard Annex #29</a>.
+ /// <p/>
+ /// </para>
+ /// <para>Many applications have specific tokenizer needs. If this tokenizer does
+ /// not suit your application, please consider copying this source code
+ /// directory to your project and maintaining your own grammar-based tokenizer.
+ ///
+ /// <a name="version"/>
+ /// </para>
+ /// <para>You must specify the required <seealso cref="Version"/>
+ /// compatibility when creating StandardTokenizer:
+ /// <ul>
+ /// <li> As of 3.4, Hiragana and Han characters are no longer wrongly split
+ /// from their combining characters. If you use a previous version number,
+ /// you get the exact broken behavior for backwards compatibility.
+ /// <li> As of 3.1, StandardTokenizer implements Unicode text segmentation.
+ /// If you use a previous version number, you get the exact behavior of
+ /// <seealso cref="ClassicTokenizer"/> for backwards compatibility.
+ /// </ul>
+ /// </para>
+ /// </summary>
+
+ public sealed class StandardTokenizer : Tokenizer
+ {
+ /// <summary>
+ /// A private instance of the JFlex-constructed scanner </summary>
+ private StandardTokenizerInterface scanner;
+
+ public const int ALPHANUM = 0;
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int APOSTROPHE = 1;
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int ACRONYM = 2;
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int COMPANY = 3;
+ public const int EMAIL = 4;
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int HOST = 5;
+ public const int NUM = 6;
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int CJ = 7;
+
+ /// @deprecated (3.1)
+ [Obsolete("(3.1)")]
+ public const int ACRONYM_DEP = 8;
+
+ public const int SOUTHEAST_ASIAN = 9;
+ public const int IDEOGRAPHIC = 10;
+ public const int HIRAGANA = 11;
+ public const int KATAKANA = 12;
+ public const int HANGUL = 13;
+
+ /// <summary>
+ /// String token types that correspond to token type int constants </summary>
+ public static readonly string[] TOKEN_TYPES = { "<ALPHANUM>", "<APOSTROPHE>", "<ACRONYM>", "<COMPANY>", "<EMAIL>", "<HOST>", "<NUM>", "<CJ>", "<ACRONYM_DEP>", "<SOUTHEAST_ASIAN>", "<IDEOGRAPHIC>", "<HIRAGANA>", "<KATAKANA>", "<HANGUL>" };
+
+ private int skippedPositions;
+
+ private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+ /// <summary>
+ /// Set the max allowed token length. Any token longer
+ /// than this is skipped.
+ /// </summary>
+ public int MaxTokenLength
+ {
+ set
+ {
+ if (value < 1)
+ {
+ throw new System.ArgumentException("maxTokenLength must be greater than zero");
+ }
+ this.maxTokenLength = value;
+ }
+ get
+ {
+ return maxTokenLength;
+ }
+ }
+
+
+ /// <summary>
+ /// Creates a new instance of the <seealso cref="StandardTokenizer"/>. Attaches
+ /// the <code>input</code> to the newly created JFlex scanner.
+ /// </summary>
+ /// <param name="input"> The input reader
+ ///
+ /// See http://issues.apache.org/jira/browse/LUCENE-1068 </param>
+ public StandardTokenizer(Version matchVersion, Reader input)
+ : base(input)
+ {
+ termAtt = AddAttribute<ICharTermAttribute>();
+ posIncrAtt = AddAttribute<IPositionIncrementAttribute>();
+ offsetAtt = AddAttribute<IOffsetAttribute>();
+ typeAtt = AddAttribute<ITypeAttribute>();
+
+ Init(matchVersion);
+ }
+
+ /// <summary>
+ /// Creates a new StandardTokenizer with a given <seealso cref="org.apache.lucene.util.AttributeSource.AttributeFactory"/>
+ /// </summary>
+ public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input)
+ : base(factory, input)
+ {
+ Init(matchVersion);
+ }
+
+ private void Init(Version matchVersion)
+ {
+ if (matchVersion.OnOrAfter(Version.LUCENE_47))
+ {
+ this.scanner = new StandardTokenizerImpl(input);
+ }
+ else if (matchVersion.onOrAfter(Version.LUCENE_40))
+ {
+ this.scanner = new StandardTokenizerImpl40(input);
+ }
+ else if (matchVersion.onOrAfter(Version.LUCENE_34))
+ {
+ this.scanner = new StandardTokenizerImpl34(input);
+ }
+ else if (matchVersion.onOrAfter(Version.LUCENE_31))
+ {
+ this.scanner = new StandardTokenizerImpl31(input);
+ }
+ else
+ {
+ this.scanner = new ClassicTokenizerImpl(input);
+ }
+ }
+
+ // this tokenizer generates three attributes:
+ // term offset, positionIncrement and type
+ private readonly ICharTermAttribute termAtt;
+ private readonly IOffsetAttribute offsetAtt;
+ private readonly IPositionIncrementAttribute posIncrAtt;
+ private readonly ITypeAttribute typeAtt;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.lucene.analysis.TokenStream#next()
+ */
+ public override bool IncrementToken()
+ {
+ ClearAttributes();
+ skippedPositions = 0;
+
+ while (true)
+ {
+ int tokenType = scanner.NextToken;
+
+ if (tokenType == StandardTokenizerInterface_Fields.YYEOF)
+ {
+ return false;
+ }
+
+ if (scanner.yylength() <= maxTokenLength)
+ {
+ posIncrAtt.PositionIncrement = skippedPositions + 1;
+ scanner.getText(termAtt);
+ //JAVA TO C# CONVERTER WARNING: The original Java variable was marked 'final':
+ //ORIGINAL LINE: final int start = scanner.yychar();
+ int start = scanner.yychar();
+ offsetAtt.SetOffset(correctOffset(start), correctOffset(start + termAtt.length()));
+ // This 'if' should be removed in the next release. For now, it converts
+ // invalid acronyms to HOST. When removed, only the 'else' part should
+ // remain.
+ if (tokenType == StandardTokenizer.ACRONYM_DEP)
+ {
+ typeAtt.Type = StandardTokenizer.TOKEN_TYPES[StandardTokenizer.HOST];
+ termAtt.Length = termAtt.length() - 1; // remove extra '.'
+ }
+ else
+ {
+ typeAtt.Type = StandardTokenizer.TOKEN_TYPES[tokenType];
+ }
+ return true;
+ }
+ else
+ // When we skip a too-long term, we still increment the
+ // position increment
+ {
+ skippedPositions++;
+ }
+ }
+ }
+
+ public override void End()
+ {
+ base.End();
+ // set final offset
+ int finalOffset = CorrectOffset(scanner.yychar() + scanner.yylength());
+ offsetAtt.SetOffset(finalOffset, finalOffset);
+ // adjust any skipped tokens
+ posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+ }
+
+ public override void Close()
+ {
+ base.Close();
+ scanner.yyreset(input);
+ }
+
+ public override void Reset()
+ {
+ base.Reset();
+ scanner.yyreset(input);
+ skippedPositions = 0;
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
index f1fc9da..0b6bbe6 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
namespace org.apache.lucene.analysis.standard
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
index c92b86e..86ba884 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/StandardTokenizerImpl.cs
@@ -1,6 +1,6 @@
using System;
-
/* The following code was generated by JFlex 1.5.1 */
+using Lucene.Net.Analysis.Standard;
namespace org.apache.lucene.analysis.standard
{
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
index fd546ce..628ca23 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailAnalyzer.cs
@@ -1,4 +1,5 @@
using Lucene.Net.Analysis.Core;
+using Lucene.Net.Analysis.Standard;
using Lucene.Net.Analysis.Util;
using StopwordAnalyzerBase = Lucene.Net.Analysis.Util.StopwordAnalyzerBase;
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
index 83ac99c..097df6a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizer.cs
@@ -1,4 +1,6 @@
-namespace org.apache.lucene.analysis.standard
+using Lucene.Net.Analysis.Standard;
+
+namespace org.apache.lucene.analysis.standard
{
/*
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/39ec9d8d/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
index 350fdbb..9cd3d30 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Standard/UAX29URLEmailTokenizerFactory.cs
@@ -1,4 +1,5 @@
using System.Collections.Generic;
+using Lucene.Net.Analysis.Standard;
using TokenizerFactory = Lucene.Net.Analysis.Util.TokenizerFactory;
namespace org.apache.lucene.analysis.standard