You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by cc...@apache.org on 2013/04/03 19:40:33 UTC
[50/51] [partial] Mass convert mixed tabs to spaces
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/De/GermanStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/De/GermanStemmer.cs b/src/contrib/Analyzers/De/GermanStemmer.cs
index d94d604..4dc80e3 100644
--- a/src/contrib/Analyzers/De/GermanStemmer.cs
+++ b/src/contrib/Analyzers/De/GermanStemmer.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -26,167 +26,167 @@ using System.Collections;
namespace Lucene.Net.Analysis.De
{
- /// <summary>
- /// A stemmer for German words. The algorithm is based on the report
- /// "A Fast and Simple Stemming Algorithm for German Words" by Jörg
- /// Caumanns (joerg.caumanns@isst.fhg.de).
- /// </summary>
- public class GermanStemmer
- {
- /// <summary>
- /// Buffer for the terms while stemming them.
- /// </summary>
- private StringBuilder sb = new StringBuilder();
+ /// <summary>
+ /// A stemmer for German words. The algorithm is based on the report
+ /// "A Fast and Simple Stemming Algorithm for German Words" by Jörg
+ /// Caumanns (joerg.caumanns@isst.fhg.de).
+ /// </summary>
+ public class GermanStemmer
+ {
+ /// <summary>
+ /// Buffer for the terms while stemming them.
+ /// </summary>
+ private StringBuilder sb = new StringBuilder();
- /// <summary>
- /// Amount of characters that are removed with <tt>Substitute()</tt> while stemming.
- /// </summary>
- protected int substCount = 0;
+ /// <summary>
+ /// Amount of characters that are removed with <tt>Substitute()</tt> while stemming.
+ /// </summary>
+ protected int substCount = 0;
- /// <summary>
- /// Stemms the given term to an unique <tt>discriminator</tt>.
- /// </summary>
- /// <param name="term">The term that should be stemmed.</param>
- /// <returns>Discriminator for <tt>term</tt></returns>
- internal String Stem( String term )
- {
- // Use lowercase for medium stemming.
- term = term.ToLower();
- if ( !IsStemmable( term ) )
- return term;
- // Reset the StringBuilder.
- sb.Remove(0, sb.Length);
- sb.Insert(0, term);
- // Stemming starts here...
- Substitute( sb );
- Strip( sb );
- Optimize( sb );
- Resubstitute( sb );
- RemoveParticleDenotion( sb );
- return sb.ToString();
- }
+ /// <summary>
+ /// Stemms the given term to an unique <tt>discriminator</tt>.
+ /// </summary>
+ /// <param name="term">The term that should be stemmed.</param>
+ /// <returns>Discriminator for <tt>term</tt></returns>
+ internal String Stem( String term )
+ {
+ // Use lowercase for medium stemming.
+ term = term.ToLower();
+ if ( !IsStemmable( term ) )
+ return term;
+ // Reset the StringBuilder.
+ sb.Remove(0, sb.Length);
+ sb.Insert(0, term);
+ // Stemming starts here...
+ Substitute( sb );
+ Strip( sb );
+ Optimize( sb );
+ Resubstitute( sb );
+ RemoveParticleDenotion( sb );
+ return sb.ToString();
+ }
- /// <summary>
- /// Checks if a term could be stemmed.
- /// </summary>
- /// <param name="term"></param>
- /// <returns>true if, and only if, the given term consists in letters.</returns>
- private bool IsStemmable( String term )
- {
- for ( int c = 0; c < term.Length; c++ )
- {
- if ( !Char.IsLetter(term[c])) return false;
- }
- return true;
- }
+ /// <summary>
+ /// Checks if a term could be stemmed.
+ /// </summary>
+ /// <param name="term"></param>
+ /// <returns>true if, and only if, the given term consists in letters.</returns>
+ private bool IsStemmable( String term )
+ {
+ for ( int c = 0; c < term.Length; c++ )
+ {
+ if ( !Char.IsLetter(term[c])) return false;
+ }
+ return true;
+ }
- /// <summary>
- /// Suffix stripping (stemming) on the current term. The stripping is reduced
- /// to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
- /// from which all regular suffixes are build of. The simplification causes
- /// some overstemming, and way more irregular stems, but still provides unique.
- /// discriminators in the most of those cases.
- /// The algorithm is context free, except of the length restrictions.
- /// </summary>
- /// <param name="buffer"></param>
- private void Strip( StringBuilder buffer )
- {
- bool doMore = true;
- while ( doMore && buffer.Length > 3 )
- {
- if ( ( buffer.Length + substCount > 5 ) &&
- buffer.ToString().Substring(buffer.Length - 2, 2).Equals( "nd" ) )
- {
- buffer.Remove( buffer.Length - 2, 2 );
- }
- else if ( ( buffer.Length + substCount > 4 ) &&
- buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "em" ) )
- {
- buffer.Remove( buffer.Length - 2, 2 );
- }
- else if ( ( buffer.Length + substCount > 4 ) &&
- buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "er" ) )
- {
- buffer.Remove( buffer.Length - 2, 2 );
- }
- else if ( buffer[buffer.Length - 1] == 'e' )
- {
- buffer.Remove(buffer.Length - 1, 1);
- }
- else if ( buffer[buffer.Length - 1] == 's' )
- {
- buffer.Remove(buffer.Length - 1, 1);
- }
- else if ( buffer[buffer.Length - 1] == 'n' )
- {
- buffer.Remove(buffer.Length - 1, 1);
- }
- // "t" occurs only as suffix of verbs.
- else if ( buffer[buffer.Length - 1] == 't')
- {
- buffer.Remove(buffer.Length - 1, 1);
- }
- else
- {
- doMore = false;
- }
- }
- }
+ /// <summary>
+ /// Suffix stripping (stemming) on the current term. The stripping is reduced
+ /// to the seven "base" suffixes "e", "s", "n", "t", "em", "er" and * "nd",
+ /// from which all regular suffixes are build of. The simplification causes
+ /// some overstemming, and way more irregular stems, but still provides unique.
+ /// discriminators in the most of those cases.
+ /// The algorithm is context free, except of the length restrictions.
+ /// </summary>
+ /// <param name="buffer"></param>
+ private void Strip( StringBuilder buffer )
+ {
+ bool doMore = true;
+ while ( doMore && buffer.Length > 3 )
+ {
+ if ( ( buffer.Length + substCount > 5 ) &&
+ buffer.ToString().Substring(buffer.Length - 2, 2).Equals( "nd" ) )
+ {
+ buffer.Remove( buffer.Length - 2, 2 );
+ }
+ else if ( ( buffer.Length + substCount > 4 ) &&
+ buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "em" ) )
+ {
+ buffer.Remove( buffer.Length - 2, 2 );
+ }
+ else if ( ( buffer.Length + substCount > 4 ) &&
+ buffer.ToString().Substring( buffer.Length - 2, 2).Equals( "er" ) )
+ {
+ buffer.Remove( buffer.Length - 2, 2 );
+ }
+ else if ( buffer[buffer.Length - 1] == 'e' )
+ {
+ buffer.Remove(buffer.Length - 1, 1);
+ }
+ else if ( buffer[buffer.Length - 1] == 's' )
+ {
+ buffer.Remove(buffer.Length - 1, 1);
+ }
+ else if ( buffer[buffer.Length - 1] == 'n' )
+ {
+ buffer.Remove(buffer.Length - 1, 1);
+ }
+ // "t" occurs only as suffix of verbs.
+ else if ( buffer[buffer.Length - 1] == 't')
+ {
+ buffer.Remove(buffer.Length - 1, 1);
+ }
+ else
+ {
+ doMore = false;
+ }
+ }
+ }
- /// <summary>
- /// Does some optimizations on the term. This optimisations are contextual.
- /// </summary>
- /// <param name="buffer"></param>
- private void Optimize( StringBuilder buffer )
- {
- // Additional step for female plurals of professions and inhabitants.
- if ( buffer.Length > 5 && buffer.ToString().Substring(buffer.Length - 5, 5).Equals( "erin*" ))
- {
- buffer.Remove(buffer.Length - 1, 1);
- Strip(buffer);
- }
- // Additional step for irregular plural nouns like "Matrizen -> Matrix".
- if ( buffer[buffer.Length - 1] == ('z') )
- {
- buffer[buffer.Length - 1] = 'x';
- }
- }
+ /// <summary>
+ /// Does some optimizations on the term. This optimisations are contextual.
+ /// </summary>
+ /// <param name="buffer"></param>
+ private void Optimize( StringBuilder buffer )
+ {
+ // Additional step for female plurals of professions and inhabitants.
+ if ( buffer.Length > 5 && buffer.ToString().Substring(buffer.Length - 5, 5).Equals( "erin*" ))
+ {
+ buffer.Remove(buffer.Length - 1, 1);
+ Strip(buffer);
+ }
+ // Additional step for irregular plural nouns like "Matrizen -> Matrix".
+ if ( buffer[buffer.Length - 1] == ('z') )
+ {
+ buffer[buffer.Length - 1] = 'x';
+ }
+ }
- /// <summary>
- /// Removes a particle denotion ("ge") from a term.
- /// </summary>
- /// <param name="buffer"></param>
- private void RemoveParticleDenotion( StringBuilder buffer )
- {
- if ( buffer.Length > 4 )
- {
- for ( int c = 0; c < buffer.Length - 3; c++ )
- {
- if ( buffer.ToString().Substring( c, 4 ).Equals( "gege" ) )
- {
- buffer.Remove(c, 2);
- return;
- }
- }
- }
- }
+ /// <summary>
+ /// Removes a particle denotion ("ge") from a term.
+ /// </summary>
+ /// <param name="buffer"></param>
+ private void RemoveParticleDenotion( StringBuilder buffer )
+ {
+ if ( buffer.Length > 4 )
+ {
+ for ( int c = 0; c < buffer.Length - 3; c++ )
+ {
+ if ( buffer.ToString().Substring( c, 4 ).Equals( "gege" ) )
+ {
+ buffer.Remove(c, 2);
+ return;
+ }
+ }
+ }
+ }
- /// <summary>
- /// Do some substitutions for the term to reduce overstemming:
- ///
- /// - Substitute Umlauts with their corresponding vowel: äöü -> aou,
+ /// <summary>
+ /// Do some substitutions for the term to reduce overstemming:
+ ///
+ /// - Substitute Umlauts with their corresponding vowel: äöü -> aou,
/// "ß" is substituted by "ss"
- /// - Substitute a second char of a pair of equal characters with
- /// an asterisk: ?? -> ?*
- /// - Substitute some common character combinations with a token:
+ /// - Substitute a second char of a pair of equal characters with
+ /// an asterisk: ?? -> ?*
+ /// - Substitute some common character combinations with a token:
/// sch/ch/ei/ie/ig/st -> $/В§/%/&/#/!
- /// </summary>
- protected virtual void Substitute( StringBuilder buffer )
- {
- substCount = 0;
- for ( int c = 0; c < buffer.Length; c++ )
- {
- // Replace the second char of a pair of the equal characters with an asterisk
+ /// </summary>
+ protected virtual void Substitute( StringBuilder buffer )
+ {
+ substCount = 0;
+ for ( int c = 0; c < buffer.Length; c++ )
+ {
+ // Replace the second char of a pair of the equal characters with an asterisk
if (c > 0 && buffer[c] == buffer[c - 1])
{
buffer[c] = '*';
@@ -212,97 +212,97 @@ namespace Lucene.Net.Analysis.De
substCount++;
}
- // Take care that at least one character is left left side from the current one
- if ( c < buffer.Length - 1 )
- {
- // Masking several common character combinations with an token
- if ( ( c < buffer.Length - 2 ) && buffer[c] == 's' &&
- buffer[c + 1] == 'c' && buffer[c + 2] == 'h' )
- {
- buffer[c] = '$';
- buffer.Remove(c + 1, 2);
- substCount =+ 2;
- }
- else if ( buffer[c] == 'c' && buffer[c + 1] == 'h' )
- {
- buffer[c] = '§';
- buffer.Remove(c + 1, 1);
- substCount++;
- }
- else if ( buffer[c] == 'e' && buffer[c + 1] == 'i' )
- {
- buffer[c] = '%';
- buffer.Remove(c + 1, 1);
- substCount++;
- }
- else if ( buffer[c] == 'i' && buffer[c + 1] == 'e' )
- {
- buffer[c] = '&';
- buffer.Remove(c + 1, 1);
- substCount++;
- }
- else if ( buffer[c] == 'i' && buffer[c + 1] == 'g' )
- {
- buffer[c] = '#';
- buffer.Remove(c + 1, 1);
- substCount++;
- }
- else if ( buffer[c] == 's' && buffer[c + 1] == 't' )
- {
- buffer[c] = '!';
- buffer.Remove(c + 1, 1);
- substCount++;
- }
- }
- }
- }
+ // Take care that at least one character is left left side from the current one
+ if ( c < buffer.Length - 1 )
+ {
+ // Masking several common character combinations with an token
+ if ( ( c < buffer.Length - 2 ) && buffer[c] == 's' &&
+ buffer[c + 1] == 'c' && buffer[c + 2] == 'h' )
+ {
+ buffer[c] = '$';
+ buffer.Remove(c + 1, 2);
+ substCount =+ 2;
+ }
+ else if ( buffer[c] == 'c' && buffer[c + 1] == 'h' )
+ {
+ buffer[c] = '§';
+ buffer.Remove(c + 1, 1);
+ substCount++;
+ }
+ else if ( buffer[c] == 'e' && buffer[c + 1] == 'i' )
+ {
+ buffer[c] = '%';
+ buffer.Remove(c + 1, 1);
+ substCount++;
+ }
+ else if ( buffer[c] == 'i' && buffer[c + 1] == 'e' )
+ {
+ buffer[c] = '&';
+ buffer.Remove(c + 1, 1);
+ substCount++;
+ }
+ else if ( buffer[c] == 'i' && buffer[c + 1] == 'g' )
+ {
+ buffer[c] = '#';
+ buffer.Remove(c + 1, 1);
+ substCount++;
+ }
+ else if ( buffer[c] == 's' && buffer[c + 1] == 't' )
+ {
+ buffer[c] = '!';
+ buffer.Remove(c + 1, 1);
+ substCount++;
+ }
+ }
+ }
+ }
- /// <summary>
- /// Undoes the changes made by Substitute(). That are character pairs and
- /// character combinations. Umlauts will remain as their corresponding vowel,
- /// as "?" remains as "ss".
- /// </summary>
- /// <param name="buffer"></param>
- private void Resubstitute( StringBuilder buffer )
- {
- for ( int c = 0; c < buffer.Length; c++ )
- {
- if ( buffer[c] == '*' )
- {
- char x = buffer[c - 1];
- buffer[c] = x;
- }
- else if ( buffer[c] == '$' )
- {
- buffer[c] = 's';
- buffer.Insert( c + 1, new char[]{'c', 'h'}, 0, 2);
- }
- else if ( buffer[c] == '§' )
- {
- buffer[c] = 'c';
- buffer.Insert( c + 1, 'h' );
- }
- else if ( buffer[c] == '%' )
- {
- buffer[c] = 'e';
- buffer.Insert( c + 1, 'i' );
- }
- else if ( buffer[c] == '&' )
- {
- buffer[c] = 'i';
- buffer.Insert( c + 1, 'e' );
- }
- else if ( buffer[c] == '#' )
- {
- buffer[c] = 'i';
- buffer.Insert( c + 1, 'g' );
- }
- else if ( buffer[c] == '!' )
- {
- buffer[c] = 's';
- buffer.Insert( c + 1, 't' );
- }
- }
- }
- }
+ /// <summary>
+ /// Undoes the changes made by Substitute(). That are character pairs and
+ /// character combinations. Umlauts will remain as their corresponding vowel,
+ /// as "?" remains as "ss".
+ /// </summary>
+ /// <param name="buffer"></param>
+ private void Resubstitute( StringBuilder buffer )
+ {
+ for ( int c = 0; c < buffer.Length; c++ )
+ {
+ if ( buffer[c] == '*' )
+ {
+ char x = buffer[c - 1];
+ buffer[c] = x;
+ }
+ else if ( buffer[c] == '$' )
+ {
+ buffer[c] = 's';
+ buffer.Insert( c + 1, new char[]{'c', 'h'}, 0, 2);
+ }
+ else if ( buffer[c] == '§' )
+ {
+ buffer[c] = 'c';
+ buffer.Insert( c + 1, 'h' );
+ }
+ else if ( buffer[c] == '%' )
+ {
+ buffer[c] = 'e';
+ buffer.Insert( c + 1, 'i' );
+ }
+ else if ( buffer[c] == '&' )
+ {
+ buffer[c] = 'i';
+ buffer.Insert( c + 1, 'e' );
+ }
+ else if ( buffer[c] == '#' )
+ {
+ buffer[c] = 'i';
+ buffer.Insert( c + 1, 'g' );
+ }
+ else if ( buffer[c] == '!' )
+ {
+ buffer[c] = 's';
+ buffer.Insert( c + 1, 't' );
+ }
+ }
+ }
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/El/GreekAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/El/GreekAnalyzer.cs b/src/contrib/Analyzers/El/GreekAnalyzer.cs
index 1242ec7..354bc0f 100644
--- a/src/contrib/Analyzers/El/GreekAnalyzer.cs
+++ b/src/contrib/Analyzers/El/GreekAnalyzer.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Filters/ChainedFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Filters/ChainedFilter.cs b/src/contrib/Analyzers/Filters/ChainedFilter.cs
index 8bc2ffd..0fa4e69 100644
--- a/src/contrib/Analyzers/Filters/ChainedFilter.cs
+++ b/src/contrib/Analyzers/Filters/ChainedFilter.cs
@@ -1,4 +1,4 @@
-/**
+/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Fr/ElisionFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fr/ElisionFilter.cs b/src/contrib/Analyzers/Fr/ElisionFilter.cs
index cf2d2ae..630b29d 100644
--- a/src/contrib/Analyzers/Fr/ElisionFilter.cs
+++ b/src/contrib/Analyzers/Fr/ElisionFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs b/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
index 9bdc94f..43bd1f9 100644
--- a/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
+++ b/src/contrib/Analyzers/Fr/FrenchAnalyzer.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Fr/FrenchStemmer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Fr/FrenchStemmer.cs b/src/contrib/Analyzers/Fr/FrenchStemmer.cs
index e2decb5..2dc3a1c 100644
--- a/src/contrib/Analyzers/Fr/FrenchStemmer.cs
+++ b/src/contrib/Analyzers/Fr/FrenchStemmer.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -47,42 +47,42 @@ public class FrenchStemmer {
*/
private StringBuilder tb = new StringBuilder();
- /*
- * Region R0 is equal to the whole buffer
- */
- private String R0;
-
- /*
- * Region RV
- * "If the word begins with two vowels, RV is the region after the third letter,
- * otherwise the region after the first vowel not at the beginning of the word,
- * or the end of the word if these positions cannot be found."
- */
+ /*
+ * Region R0 is equal to the whole buffer
+ */
+ private String R0;
+
+ /*
+ * Region RV
+ * "If the word begins with two vowels, RV is the region after the third letter,
+ * otherwise the region after the first vowel not at the beginning of the word,
+ * or the end of the word if these positions cannot be found."
+ */
private String RV;
- /*
- * Region R1
- * "R1 is the region after the first non-vowel following a vowel
- * or is the null region at the end of the word if there is no such non-vowel"
- */
+ /*
+ * Region R1
+ * "R1 is the region after the first non-vowel following a vowel
+ * or is the null region at the end of the word if there is no such non-vowel"
+ */
private String R1;
- /*
- * Region R2
- * "R2 is the region after the first non-vowel in R1 following a vowel
- * or is the null region at the end of the word if there is no such non-vowel"
- */
+ /*
+ * Region R2
+ * "R2 is the region after the first non-vowel in R1 following a vowel
+ * or is the null region at the end of the word if there is no such non-vowel"
+ */
private String R2;
- /*
- * Set to true if we need to perform step 2
- */
+ /*
+ * Set to true if we need to perform step 2
+ */
private bool suite;
- /*
- * Set to true if the buffer was modified
- */
+ /*
+ * Set to true if the buffer was modified
+ */
private bool modified;
@@ -93,599 +93,599 @@ public class FrenchStemmer {
* @return java.lang.String Discriminator for <tt>term</tt>
*/
protected internal String Stem( String term ) {
- if ( !IsStemmable( term ) ) {
- return term;
- }
+ if ( !IsStemmable( term ) ) {
+ return term;
+ }
+
+ // Use lowercase for medium stemming.
+ term = term.ToLower();
- // Use lowercase for medium stemming.
- term = term.ToLower();
+ // Reset the StringBuilder.
+ sb.Length = 0;
+ sb.Insert( 0, term );
- // Reset the StringBuilder.
- sb.Length = 0;
- sb.Insert( 0, term );
+ // reset the bools
+ modified = false;
+ suite = false;
- // reset the bools
- modified = false;
- suite = false;
+ sb = TreatVowels( sb );
- sb = TreatVowels( sb );
+ SetStrings();
- SetStrings();
+ Step1();
- Step1();
+ if (!modified || suite)
+ {
+ if (RV != null)
+ {
+ suite = Step2A();
+ if (!suite)
+ Step2B();
+ }
+ }
- if (!modified || suite)
- {
- if (RV != null)
- {
- suite = Step2A();
- if (!suite)
- Step2B();
- }
- }
+ if (modified || suite)
+ Step3();
+ else
+ Step4();
- if (modified || suite)
- Step3();
- else
- Step4();
+ Step5();
- Step5();
+ Step6();
- Step6();
+ return sb.ToString();
+ }
- return sb.ToString();
+ /*
+ * Sets the search region Strings<br>
+ * it needs to be done each time the buffer was modified
+ */
+ private void SetStrings() {
+ // set the strings
+ R0 = sb.ToString();
+ RV = RetrieveRV( sb );
+ R1 = RetrieveR( sb );
+ if ( R1 != null )
+ {
+ tb.Length = 0;
+ tb.Insert( 0, R1 );
+ R2 = RetrieveR( tb );
+ }
+ else
+ R2 = null;
}
- /*
- * Sets the search region Strings<br>
- * it needs to be done each time the buffer was modified
- */
- private void SetStrings() {
- // set the strings
- R0 = sb.ToString();
- RV = RetrieveRV( sb );
- R1 = RetrieveR( sb );
- if ( R1 != null )
- {
- tb.Length = 0;
- tb.Insert( 0, R1 );
- R2 = RetrieveR( tb );
- }
- else
- R2 = null;
- }
-
- /*
- * First step of the Porter Algorithm<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step1( ) {
- String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
- DeleteFrom( R2, suffix );
-
- ReplaceFrom( R2, new String[] { "logies", "logie" }, "log" );
- ReplaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
- ReplaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
-
- String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
- DeleteButSuffixFromElseReplace( R2, search, "ic", true, R0, "iqU" );
-
- DeleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
- DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
- DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
- DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
- DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
-
- DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
- DeleteFrom( RV, new String[] { "ements", "ement" } );
+ /*
+ * First step of the Porter Algorithm<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step1( ) {
+ String[] suffix = { "ances", "iqUes", "ismes", "ables", "istes", "ance", "iqUe", "isme", "able", "iste" };
+ DeleteFrom( R2, suffix );
+
+ ReplaceFrom( R2, new String[] { "logies", "logie" }, "log" );
+ ReplaceFrom( R2, new String[] { "usions", "utions", "usion", "ution" }, "u" );
+ ReplaceFrom( R2, new String[] { "ences", "ence" }, "ent" );
+
+ String[] search = { "atrices", "ateurs", "ations", "atrice", "ateur", "ation"};
+ DeleteButSuffixFromElseReplace( R2, search, "ic", true, R0, "iqU" );
+
+ DeleteButSuffixFromElseReplace( R2, new String[] { "ements", "ement" }, "eus", false, R0, "eux" );
+ DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "ativ", false );
+ DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iv", false );
+ DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "abl", false );
+ DeleteButSuffixFrom( R2, new String[] { "ements", "ement" }, "iqU", false );
+
+ DeleteFromIfTestVowelBeforeIn( R1, new String[] { "issements", "issement" }, false, R0 );
+ DeleteFrom( RV, new String[] { "ements", "ement" } );
DeleteButSuffixFromElseReplace(R2, new [] { "it\u00e9s", "it\u00e9" }, "abil", false, R0, "abl");
DeleteButSuffixFromElseReplace(R2, new [] { "it\u00e9s", "it\u00e9" }, "ic", false, R0, "iqU");
DeleteButSuffixFrom(R2, new [] { "it\u00e9s", "it\u00e9" }, "iv", true);
- String[] autre = { "ifs", "ives", "if", "ive" };
- DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
- DeleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
-
- ReplaceFrom( R0, new String[] { "eaux" }, "eau" );
-
- ReplaceFrom( R1, new String[] { "aux" }, "al" );
-
- DeleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
-
- DeleteFrom( R2, new String[] { "eux" } );
-
- // if one of the next steps is performed, we will need to perform step2a
- bool temp = false;
- temp = ReplaceFrom( RV, new String[] { "amment" }, "ant" );
- if (temp == true)
- suite = true;
- temp = ReplaceFrom( RV, new String[] { "emment" }, "ent" );
- if (temp == true)
- suite = true;
- temp = DeleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
- if (temp == true)
- suite = true;
-
- }
-
- /*
- * Second step (A) of the Porter Algorithm<br>
- * Will be performed if nothing changed from the first step
- * or changed were done in the amment, emment, ments or ment suffixes<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- *
- * @return bool - true if something changed in the StringBuilder
- */
- private bool Step2A() {
+ String[] autre = { "ifs", "ives", "if", "ive" };
+ DeleteButSuffixFromElseReplace( R2, autre, "icat", false, R0, "iqU" );
+ DeleteButSuffixFromElseReplace( R2, autre, "at", true, R2, "iqU" );
+
+ ReplaceFrom( R0, new String[] { "eaux" }, "eau" );
+
+ ReplaceFrom( R1, new String[] { "aux" }, "al" );
+
+ DeleteButSuffixFromElseReplace( R2, new String[] { "euses", "euse" }, "", true, R1, "eux" );
+
+ DeleteFrom( R2, new String[] { "eux" } );
+
+ // if one of the next steps is performed, we will need to perform step2a
+ bool temp = false;
+ temp = ReplaceFrom( RV, new String[] { "amment" }, "ant" );
+ if (temp == true)
+ suite = true;
+ temp = ReplaceFrom( RV, new String[] { "emment" }, "ent" );
+ if (temp == true)
+ suite = true;
+ temp = DeleteFromIfTestVowelBeforeIn( RV, new String[] { "ments", "ment" }, true, RV );
+ if (temp == true)
+ suite = true;
+
+ }
+
+ /*
+ * Second step (A) of the Porter Algorithm<br>
+ * Will be performed if nothing changed from the first step
+ * or changed were done in the amment, emment, ments or ment suffixes<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ *
+ * @return bool - true if something changed in the StringBuilder
+ */
+ private bool Step2A() {
String[] search = { "\u00eemes", "\u00eetes", "iraIent", "irait", "irais", "irai", "iras", "ira",
- "irent", "iriez", "irez", "irions", "irons", "iront",
- "issaIent", "issais", "issantes", "issante", "issants", "issant",
- "issait", "issais", "issions", "issons", "issiez", "issez", "issent",
- "isses", "isse", "ir", "is", "\u00eet", "it", "ies", "ie", "i" };
- return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
- }
-
- /*
- * Second step (B) of the Porter Algorithm<br>
- * Will be performed if step 2 A was performed unsuccessfully<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step2B() {
- String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
- "erons", "eront","erez", "\u00e8rent", "era", "\u00e9es", "iez",
- "\u00e9e", "\u00e9s", "er", "ez", "\u00e9" };
- DeleteFrom( RV, suffix );
-
- String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
- "antes", "aIent", "Aient", "ante", "\u00e2mes", "\u00e2tes", "ants", "ant",
- "ait", "a\u00eet", "ais", "Ait", "A\u00eet", "Ais", "\u00e2t", "as", "ai", "Ai", "a" };
- DeleteButSuffixFrom( RV, search, "e", true );
-
- DeleteFrom( R2, new String[] { "ions" } );
- }
-
- /*
- * Third step of the Porter Algorithm<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step3() {
- if (sb.Length>0)
- {
- char ch = sb[ sb.Length -1];
- if (ch == 'Y')
- {
- sb[sb.Length -1] = 'i' ;
- SetStrings();
- }
+ "irent", "iriez", "irez", "irions", "irons", "iront",
+ "issaIent", "issais", "issantes", "issante", "issants", "issant",
+ "issait", "issais", "issions", "issons", "issiez", "issez", "issent",
+ "isses", "isse", "ir", "is", "\u00eet", "it", "ies", "ie", "i" };
+ return DeleteFromIfTestVowelBeforeIn( RV, search, false, RV );
+ }
+
+ /*
+ * Second step (B) of the Porter Algorithm<br>
+ * Will be performed if step 2 A was performed unsuccessfully<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step2B() {
+ String[] suffix = { "eraIent", "erais", "erait", "erai", "eras", "erions", "eriez",
+ "erons", "eront","erez", "\u00e8rent", "era", "\u00e9es", "iez",
+ "\u00e9e", "\u00e9s", "er", "ez", "\u00e9" };
+ DeleteFrom( RV, suffix );
+
+ String[] search = { "assions", "assiez", "assent", "asses", "asse", "aIent",
+ "antes", "aIent", "Aient", "ante", "\u00e2mes", "\u00e2tes", "ants", "ant",
+ "ait", "a\u00eet", "ais", "Ait", "A\u00eet", "Ais", "\u00e2t", "as", "ai", "Ai", "a" };
+ DeleteButSuffixFrom( RV, search, "e", true );
+
+ DeleteFrom( R2, new String[] { "ions" } );
+ }
+
+ /*
+ * Third step of the Porter Algorithm<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step3() {
+ if (sb.Length>0)
+ {
+ char ch = sb[ sb.Length -1];
+ if (ch == 'Y')
+ {
+ sb[sb.Length -1] = 'i' ;
+ SetStrings();
+ }
else if (ch == 'ç')
- {
- sb[sb.Length -1] = 'c';
- SetStrings();
- }
- }
- }
-
- /*
- * Fourth step of the Porter Algorithm<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step4() {
- if (sb.Length > 1)
- {
- char ch = sb[ sb.Length -1];
- if (ch == 's')
- {
- char b = sb[ sb.Length -2];
- if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
- {
- sb.Length = sb.Length - 1;
- SetStrings();
- }
- }
- }
- bool found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
- if (!found)
- found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
+ {
+ sb[sb.Length -1] = 'c';
+ SetStrings();
+ }
+ }
+ }
+
+ /*
+ * Fourth step of the Porter Algorithm<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step4() {
+ if (sb.Length > 1)
+ {
+ char ch = sb[ sb.Length -1];
+ if (ch == 's')
+ {
+ char b = sb[ sb.Length -2];
+ if (b != 'a' && b != 'i' && b != 'o' && b != 'u' && b != 'è' && b != 's')
+ {
+ sb.Length = sb.Length - 1;
+ SetStrings();
+ }
+ }
+ }
+ bool found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "s" );
+ if (!found)
+ found = DeleteFromIfPrecededIn( R2, new String[] { "ion" }, RV, "t" );
ReplaceFrom(RV, new String[] { "I\u00e8re", "i\u00e8re", "Ier", "ier" }, "i");
- DeleteFrom( RV, new String[] { "e" } );
+ DeleteFrom( RV, new String[] { "e" } );
DeleteFromIfPrecededIn(RV, new String[] { "\u00eb" }, R0, "gu");
- }
-
- /*
- * Fifth step of the Porter Algorithm<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step5() {
- if (R0 != null)
- {
- if (R0.EndsWith("enn") || R0.EndsWith("onn") || R0.EndsWith("ett") || R0.EndsWith("ell") || R0.EndsWith("eill"))
- {
- sb.Length = sb.Length - 1;
- SetStrings();
- }
- }
- }
-
- /*
- * Sixth (and last!) step of the Porter Algorithm<br>
- * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
- */
- private void Step6() {
- if (R0!=null && R0.Length>0)
- {
- bool seenVowel = false;
- bool seenConson = false;
- int pos = -1;
- for (int i = R0.Length-1; i > -1; i--)
- {
- char ch = R0[i] ;
- if (IsVowel(ch))
- {
- if (!seenVowel)
- {
- if (ch == 'é' || ch == 'è')
- {
- pos = i;
- break;
- }
- }
- seenVowel = true;
- }
- else
- {
- if (seenVowel)
- break;
- else
- seenConson = true;
- }
- }
- if (pos > -1 && seenConson && !seenVowel)
- sb[pos] = 'e';
- }
- }
-
- /*
- * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
- *
- * @param source java.lang.String - the primary source zone for search
- * @param search java.lang.String[] - the strings to search for suppression
- * @param from java.lang.String - the secondary source zone for search
- * @param prefix java.lang.String - the prefix to add to the search string to test
- * @return bool - true if modified
- */
- private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
- bool found = false;
- if (source!=null )
- {
- for (int i = 0; i < search.Length; i++) {
- if ( source.EndsWith( search[i] ))
- {
- if (from!=null && from.EndsWith( prefix + search[i] ))
- {
- sb.Length = sb.Length - search[i].Length;
- found = true;
- SetStrings();
- break;
- }
- }
- }
- }
- return found;
- }
-
- /*
- * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
- *
- * @param source java.lang.String - the primary source zone for search
- * @param search java.lang.String[] - the strings to search for suppression
- * @param vowel bool - true if we need a vowel before the search string
- * @param from java.lang.String - the secondary source zone for search (where vowel could be)
- * @return bool - true if modified
- */
- private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search, bool vowel, String from ) {
- bool found = false;
- if (source!=null && from!=null)
- {
- for (int i = 0; i < search.Length; i++) {
- if ( source.EndsWith( search[i] ))
- {
- if ((search[i].Length + 1) <= from.Length)
- {
- bool test = IsVowel(sb[sb.Length -(search[i].Length+1)]);
- if (test == vowel)
- {
- sb.Length = sb.Length - search[i].Length;
- modified = true;
- found = true;
- SetStrings();
- break;
- }
- }
- }
- }
- }
- return found;
- }
-
- /*
- * Delete a suffix searched in zone "source" if preceded by the prefix
- *
- * @param source java.lang.String - the primary source zone for search
- * @param search java.lang.String[] - the strings to search for suppression
- * @param prefix java.lang.String - the prefix to add to the search string to test
- * @param without bool - true if it will be deleted even without prefix found
- */
- private void DeleteButSuffixFrom( String source, String[] search, String prefix, bool without ) {
- if (source!=null)
- {
- for (int i = 0; i < search.Length; i++) {
- if ( source.EndsWith( prefix + search[i] ))
- {
- sb.Length = sb.Length - (prefix.Length + search[i].Length);
- modified = true;
- SetStrings();
- break;
- }
- else if ( without && source.EndsWith( search[i] ))
- {
- sb.Length = sb.Length - search[i].Length;
- modified = true;
- SetStrings();
- break;
- }
- }
- }
- }
-
- /*
- * Delete a suffix searched in zone "source" if preceded by prefix<br>
- * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
- * or delete the suffix if specified
- *
- * @param source java.lang.String - the primary source zone for search
- * @param search java.lang.String[] - the strings to search for suppression
- * @param prefix java.lang.String - the prefix to add to the search string to test
- * @param without bool - true if it will be deleted even without prefix found
- */
- private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix, bool without, String from, String replace ) {
- if (source!=null)
- {
- for (int i = 0; i < search.Length; i++) {
- if ( source.EndsWith( prefix + search[i] ))
- {
- sb.Length = sb.Length - (prefix.Length + search[i].Length);
- modified = true;
- SetStrings();
- break;
- }
- else if ( from!=null && from.EndsWith( prefix + search[i] ))
- {
+ }
+
+ /*
+ * Fifth step of the Porter Algorithm<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step5() {
+ if (R0 != null)
+ {
+ if (R0.EndsWith("enn") || R0.EndsWith("onn") || R0.EndsWith("ett") || R0.EndsWith("ell") || R0.EndsWith("eill"))
+ {
+ sb.Length = sb.Length - 1;
+ SetStrings();
+ }
+ }
+ }
+
+ /*
+ * Sixth (and last!) step of the Porter Algorithm<br>
+ * refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
+ */
+ private void Step6() {
+ if (R0!=null && R0.Length>0)
+ {
+ bool seenVowel = false;
+ bool seenConson = false;
+ int pos = -1;
+ for (int i = R0.Length-1; i > -1; i--)
+ {
+ char ch = R0[i] ;
+ if (IsVowel(ch))
+ {
+ if (!seenVowel)
+ {
+ if (ch == 'é' || ch == 'è')
+ {
+ pos = i;
+ break;
+ }
+ }
+ seenVowel = true;
+ }
+ else
+ {
+ if (seenVowel)
+ break;
+ else
+ seenConson = true;
+ }
+ }
+ if (pos > -1 && seenConson && !seenVowel)
+ sb[pos] = 'e';
+ }
+ }
+
+ /*
+ * Delete a suffix searched in zone "source" if zone "from" contains prefix + search string
+ *
+ * @param source java.lang.String - the primary source zone for search
+ * @param search java.lang.String[] - the strings to search for suppression
+ * @param from java.lang.String - the secondary source zone for search
+ * @param prefix java.lang.String - the prefix to add to the search string to test
+ * @return bool - true if modified
+ */
+ private bool DeleteFromIfPrecededIn( String source, String[] search, String from, String prefix ) {
+ bool found = false;
+ if (source!=null )
+ {
+ for (int i = 0; i < search.Length; i++) {
+ if ( source.EndsWith( search[i] ))
+ {
+ if (from!=null && from.EndsWith( prefix + search[i] ))
+ {
+ sb.Length = sb.Length - search[i].Length;
+ found = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ }
+ return found;
+ }
+
+ /*
+ * Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel
+ *
+ * @param source java.lang.String - the primary source zone for search
+ * @param search java.lang.String[] - the strings to search for suppression
+ * @param vowel bool - true if we need a vowel before the search string
+ * @param from java.lang.String - the secondary source zone for search (where vowel could be)
+ * @return bool - true if modified
+ */
+ private bool DeleteFromIfTestVowelBeforeIn( String source, String[] search, bool vowel, String from ) {
+ bool found = false;
+ if (source!=null && from!=null)
+ {
+ for (int i = 0; i < search.Length; i++) {
+ if ( source.EndsWith( search[i] ))
+ {
+ if ((search[i].Length + 1) <= from.Length)
+ {
+ bool test = IsVowel(sb[sb.Length -(search[i].Length+1)]);
+ if (test == vowel)
+ {
+ sb.Length = sb.Length - search[i].Length;
+ modified = true;
+ found = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ }
+ }
+ return found;
+ }
+
+ /*
+ * Delete a suffix searched in zone "source" if preceded by the prefix
+ *
+ * @param source java.lang.String - the primary source zone for search
+ * @param search java.lang.String[] - the strings to search for suppression
+ * @param prefix java.lang.String - the prefix to add to the search string to test
+ * @param without bool - true if it will be deleted even without prefix found
+ */
+ private void DeleteButSuffixFrom( String source, String[] search, String prefix, bool without ) {
+ if (source!=null)
+ {
+ for (int i = 0; i < search.Length; i++) {
+ if ( source.EndsWith( prefix + search[i] ))
+ {
+ sb.Length = sb.Length - (prefix.Length + search[i].Length);
+ modified = true;
+ SetStrings();
+ break;
+ }
+ else if ( without && source.EndsWith( search[i] ))
+ {
+ sb.Length = sb.Length - search[i].Length;
+ modified = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Delete a suffix searched in zone "source" if preceded by prefix<br>
+ * or replace it with the replace string if preceded by the prefix in the zone "from"<br>
+ * or delete the suffix if specified
+ *
+ * @param source java.lang.String - the primary source zone for search
+ * @param search java.lang.String[] - the strings to search for suppression
+ * @param prefix java.lang.String - the prefix to add to the search string to test
+ * @param without bool - true if it will be deleted even without prefix found
+ */
+ private void DeleteButSuffixFromElseReplace( String source, String[] search, String prefix, bool without, String from, String replace ) {
+ if (source!=null)
+ {
+ for (int i = 0; i < search.Length; i++) {
+ if ( source.EndsWith( prefix + search[i] ))
+ {
+ sb.Length = sb.Length - (prefix.Length + search[i].Length);
+ modified = true;
+ SetStrings();
+ break;
+ }
+ else if ( from!=null && from.EndsWith( prefix + search[i] ))
+ {
// java equivalent of replace
- sb.Length = sb.Length - (prefix.Length + search[i].Length);
+ sb.Length = sb.Length - (prefix.Length + search[i].Length);
sb.Append(replace);
- modified = true;
- SetStrings();
- break;
- }
- else if ( without && source.EndsWith( search[i] ))
- {
- sb.Length = sb.Length - search[i].Length;
- modified = true;
- SetStrings();
- break;
- }
- }
- }
- }
-
- /*
- * Replace a search string with another within the source zone
- *
- * @param source java.lang.String - the source zone for search
- * @param search java.lang.String[] - the strings to search for replacement
- * @param replace java.lang.String - the replacement string
- */
- private bool ReplaceFrom( String source, String[] search, String replace ) {
- bool found = false;
- if (source!=null)
- {
- for (int i = 0; i < search.Length; i++) {
- if ( source.EndsWith( search[i] ))
- {
+ modified = true;
+ SetStrings();
+ break;
+ }
+ else if ( without && source.EndsWith( search[i] ))
+ {
+ sb.Length = sb.Length - search[i].Length;
+ modified = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Replace a search string with another within the source zone
+ *
+ * @param source java.lang.String - the source zone for search
+ * @param search java.lang.String[] - the strings to search for replacement
+ * @param replace java.lang.String - the replacement string
+ */
+ private bool ReplaceFrom( String source, String[] search, String replace ) {
+ bool found = false;
+ if (source!=null)
+ {
+ for (int i = 0; i < search.Length; i++) {
+ if ( source.EndsWith( search[i] ))
+ {
// java equivalent for replace
- sb.Length = sb.Length - search[i].Length;
+ sb.Length = sb.Length - search[i].Length;
sb.Append(replace);
- modified = true;
- found = true;
- SetStrings();
- break;
- }
- }
- }
- return found;
- }
-
- /*
- * Delete a search string within the source zone
- *
- * @param source the source zone for search
- * @param suffix the strings to search for suppression
- */
- private void DeleteFrom(String source, String[] suffix ) {
- if (source!=null)
- {
- for (int i = 0; i < suffix.Length; i++) {
- if (source.EndsWith( suffix[i] ))
- {
- sb.Length = sb.Length - suffix[i].Length;
- modified = true;
- SetStrings();
- break;
- }
- }
- }
- }
-
- /*
- * Test if a char is a french vowel, including accentuated ones
- *
- * @param ch the char to test
- * @return bool - true if the char is a vowel
- */
- private bool IsVowel(char ch) {
- switch (ch)
- {
- case 'a':
- case 'e':
- case 'i':
- case 'o':
- case 'u':
- case 'y':
- case 'â':
- case 'à':
- case 'ë':
- case 'é':
- case 'ê':
- case 'è':
- case 'ï':
- case 'î':
- case 'ô':
- case 'ü':
- case 'ù':
- case 'û':
- return true;
- default:
- return false;
- }
- }
-
- /*
- * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
- * "R is the region after the first non-vowel following a vowel
- * or is the null region at the end of the word if there is no such non-vowel"<br>
- * @param buffer java.lang.StringBuilder - the in buffer
- * @return java.lang.String - the resulting string
- */
- private String RetrieveR( StringBuilder buffer ) {
- int len = buffer.Length;
- int pos = -1;
- for (int c = 0; c < len; c++) {
- if (IsVowel( buffer[ c ] ))
- {
- pos = c;
- break;
- }
- }
- if (pos > -1)
- {
- int consonne = -1;
- for (int c = pos; c < len; c++) {
- if (!IsVowel(buffer[ c ] ))
- {
- consonne = c;
- break;
- }
- }
- if (consonne > -1 && (consonne+1) < len)
+ modified = true;
+ found = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ return found;
+ }
+
+ /*
+ * Delete a search string within the source zone
+ *
+ * @param source the source zone for search
+ * @param suffix the strings to search for suppression
+ */
+ private void DeleteFrom(String source, String[] suffix ) {
+ if (source!=null)
+ {
+ for (int i = 0; i < suffix.Length; i++) {
+ if (source.EndsWith( suffix[i] ))
+ {
+ sb.Length = sb.Length - suffix[i].Length;
+ modified = true;
+ SetStrings();
+ break;
+ }
+ }
+ }
+ }
+
+ /*
+ * Test if a char is a french vowel, including accentuated ones
+ *
+ * @param ch the char to test
+ * @return bool - true if the char is a vowel
+ */
+ private bool IsVowel(char ch) {
+ switch (ch)
+ {
+ case 'a':
+ case 'e':
+ case 'i':
+ case 'o':
+ case 'u':
+ case 'y':
+ case 'â':
+ case 'à':
+ case 'ë':
+ case 'é':
+ case 'ê':
+ case 'è':
+ case 'ï':
+ case 'î':
+ case 'ô':
+ case 'ü':
+ case 'ù':
+ case 'û':
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ /*
+ * Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string<br>
+ * "R is the region after the first non-vowel following a vowel
+ * or is the null region at the end of the word if there is no such non-vowel"<br>
+ * @param buffer java.lang.StringBuilder - the in buffer
+ * @return java.lang.String - the resulting string
+ */
+ private String RetrieveR( StringBuilder buffer ) {
+ int len = buffer.Length;
+ int pos = -1;
+ for (int c = 0; c < len; c++) {
+ if (IsVowel( buffer[ c ] ))
+ {
+ pos = c;
+ break;
+ }
+ }
+ if (pos > -1)
+ {
+ int consonne = -1;
+ for (int c = pos; c < len; c++) {
+ if (!IsVowel(buffer[ c ] ))
+ {
+ consonne = c;
+ break;
+ }
+ }
+ if (consonne > -1 && (consonne+1) < len)
return buffer.ToString(consonne + 1, len - (consonne+1));
- else
- return null;
- }
- else
- return null;
- }
-
- /*
- * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
- * "If the word begins with two vowels, RV is the region after the third letter,
- * otherwise the region after the first vowel not at the beginning of the word,
- * or the end of the word if these positions cannot be found."<br>
- * @param buffer java.lang.StringBuilder - the in buffer
- * @return java.lang.String - the resulting string
- */
- private String RetrieveRV( StringBuilder buffer ) {
- int len = buffer.Length;
- if ( buffer.Length > 3)
- {
- if ( IsVowel(buffer[ 0 ] ) && IsVowel(buffer[ 1 ] )) {
+ else
+ return null;
+ }
+ else
+ return null;
+ }
+
+ /*
+ * Retrieve the "RV zone" from a buffer an return the corresponding string<br>
+ * "If the word begins with two vowels, RV is the region after the third letter,
+ * otherwise the region after the first vowel not at the beginning of the word,
+ * or the end of the word if these positions cannot be found."<br>
+ * @param buffer java.lang.StringBuilder - the in buffer
+ * @return java.lang.String - the resulting string
+ */
+ private String RetrieveRV( StringBuilder buffer ) {
+ int len = buffer.Length;
+ if ( buffer.Length > 3)
+ {
+ if ( IsVowel(buffer[ 0 ] ) && IsVowel(buffer[ 1 ] )) {
return buffer.ToString(3, len - 3);
- }
- else
- {
- int pos = 0;
- for (int c = 1; c < len; c++) {
- if (IsVowel( buffer[ c ] ))
- {
- pos = c;
- break;
- }
- }
- if ( pos+1 < len )
+ }
+ else
+ {
+ int pos = 0;
+ for (int c = 1; c < len; c++) {
+ if (IsVowel( buffer[ c ] ))
+ {
+ pos = c;
+ break;
+ }
+ }
+ if ( pos+1 < len )
return buffer.ToString(pos + 1, len - (pos+1));
- else
- return null;
- }
- }
- else
- return null;
- }
+ else
+ return null;
+ }
+ }
+ else
+ return null;
+ }
/*
- * Turns u and i preceded AND followed by a vowel to UpperCase<br>
- * Turns y preceded OR followed by a vowel to UpperCase<br>
- * Turns u preceded by q to UpperCase<br>
+ * Turns u and i preceded AND followed by a vowel to UpperCase<br>
+ * Turns y preceded OR followed by a vowel to UpperCase<br>
+ * Turns u preceded by q to UpperCase<br>
*
* @param buffer java.util.StringBuilder - the buffer to treat
* @return java.util.StringBuilder - the treated buffer
*/
private StringBuilder TreatVowels( StringBuilder buffer ) {
- for ( int c = 0; c < buffer.Length; c++ ) {
- char ch = buffer[ c ] ;
-
- if (c == 0) // first char
- {
- if (buffer.Length>1)
- {
- if (ch == 'y' && IsVowel(buffer[ c + 1 ] ))
- buffer[c] = 'Y';
- }
- }
- else if (c == buffer.Length-1) // last char
- {
- if (ch == 'u' && buffer[ c - 1 ] == 'q')
- buffer[c] = 'U';
- if (ch == 'y' && IsVowel(buffer[ c - 1 ] ))
- buffer[c] = 'Y';
- }
- else // other cases
- {
- if (ch == 'u')
- {
- if (buffer[ c - 1] == 'q')
- buffer[c] = 'U';
- else if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
- buffer[c] = 'U';
- }
- if (ch == 'i')
- {
- if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
- buffer[c] = 'I';
- }
- if (ch == 'y')
- {
- if (IsVowel(buffer[ c - 1 ] ) || IsVowel(buffer[ c + 1 ] ))
- buffer[c] = 'Y';
- }
- }
- }
-
- return buffer;
+ for ( int c = 0; c < buffer.Length; c++ ) {
+ char ch = buffer[ c ] ;
+
+ if (c == 0) // first char
+ {
+ if (buffer.Length>1)
+ {
+ if (ch == 'y' && IsVowel(buffer[ c + 1 ] ))
+ buffer[c] = 'Y';
+ }
+ }
+ else if (c == buffer.Length-1) // last char
+ {
+ if (ch == 'u' && buffer[ c - 1 ] == 'q')
+ buffer[c] = 'U';
+ if (ch == 'y' && IsVowel(buffer[ c - 1 ] ))
+ buffer[c] = 'Y';
+ }
+ else // other cases
+ {
+ if (ch == 'u')
+ {
+ if (buffer[ c - 1] == 'q')
+ buffer[c] = 'U';
+ else if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
+ buffer[c] = 'U';
+ }
+ if (ch == 'i')
+ {
+ if (IsVowel(buffer[ c - 1 ] ) && IsVowel(buffer[ c + 1 ] ))
+ buffer[c] = 'I';
+ }
+ if (ch == 'y')
+ {
+ if (IsVowel(buffer[ c - 1 ] ) || IsVowel(buffer[ c + 1 ] ))
+ buffer[c] = 'Y';
+ }
+ }
+ }
+
+ return buffer;
}
/*
@@ -694,32 +694,32 @@ public class FrenchStemmer {
* @return bool - true if, and only if, the given term consists in letters.
*/
private bool IsStemmable( String term ) {
- bool upper = false;
- int first = -1;
- for ( int c = 0; c < term.Length; c++ ) {
- // Discard terms that contain non-letter chars.
- if ( !char.IsLetter( term[c] ) ) {
- return false;
- }
- // Discard terms that contain multiple uppercase letters.
- if ( char.IsUpper( term[ c] ) ) {
- if ( upper ) {
- return false;
- }
- // First encountered uppercase letter, set flag and save
- // position.
- else {
- first = c;
- upper = true;
- }
- }
- }
- // Discard the term if it contains a single uppercase letter that
- // is not starting the term.
- if ( first > 0 ) {
- return false;
- }
- return true;
+ bool upper = false;
+ int first = -1;
+ for ( int c = 0; c < term.Length; c++ ) {
+ // Discard terms that contain non-letter chars.
+ if ( !char.IsLetter( term[c] ) ) {
+ return false;
+ }
+ // Discard terms that contain multiple uppercase letters.
+ if ( char.IsUpper( term[ c] ) ) {
+ if ( upper ) {
+ return false;
+ }
+ // First encountered uppercase letter, set flag and save
+ // position.
+ else {
+ first = c;
+ upper = true;
+ }
+ }
+ }
+ // Discard the term if it contains a single uppercase letter that
+ // is not starting the term.
+ if ( first > 0 ) {
+ return false;
+ }
+ return true;
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Hunspell/HunspellStem.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Hunspell/HunspellStem.cs b/src/contrib/Analyzers/Hunspell/HunspellStem.cs
index 379c52f..5664304 100644
--- a/src/contrib/Analyzers/Hunspell/HunspellStem.cs
+++ b/src/contrib/Analyzers/Hunspell/HunspellStem.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs b/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
index 98d5a4b..bc70321 100644
--- a/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
+++ b/src/contrib/Analyzers/Miscellaneous/EmptyTokenStream.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs b/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
index e43353e..4d3c111 100644
--- a/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
+++ b/src/contrib/Analyzers/Miscellaneous/InjectablePrefixAwareTokenFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs b/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
index 9ef8edc..0734d3c 100644
--- a/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
+++ b/src/contrib/Analyzers/Miscellaneous/PrefixAndSuffixAwareTokenFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs b/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
index 45e1d19..127a503 100644
--- a/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
+++ b/src/contrib/Analyzers/Miscellaneous/PrefixAwareTokenStream.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs b/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
index b24c0f3..232e326 100644
--- a/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
+++ b/src/contrib/Analyzers/Miscellaneous/SingleTokenTokenStream.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs b/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
index b2ddd36..a4a027e 100644
--- a/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
+++ b/src/contrib/Analyzers/NGram/EdgeNGramTokenFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs b/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
index a925d65..c174ff9 100644
--- a/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
+++ b/src/contrib/Analyzers/NGram/EdgeNGramTokenizer.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/NGram/NGramTokenFilter.cs b/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
index be11de2..8bb5707 100644
--- a/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
+++ b/src/contrib/Analyzers/NGram/NGramTokenFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/NGram/NGramTokenizer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/NGram/NGramTokenizer.cs b/src/contrib/Analyzers/NGram/NGramTokenizer.cs
index 773bdb5..9616a22 100644
--- a/src/contrib/Analyzers/NGram/NGramTokenizer.cs
+++ b/src/contrib/Analyzers/NGram/NGramTokenizer.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/AbstractEncoder.cs b/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
index 37771b6..1c9ffe8 100644
--- a/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
+++ b/src/contrib/Analyzers/Payloads/AbstractEncoder.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs b/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
index e17a5f0..b514735 100644
--- a/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
+++ b/src/contrib/Analyzers/Payloads/DelimitedPayloadTokenFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/FloatEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/FloatEncoder.cs b/src/contrib/Analyzers/Payloads/FloatEncoder.cs
index ec5e386..ca9a8a9 100644
--- a/src/contrib/Analyzers/Payloads/FloatEncoder.cs
+++ b/src/contrib/Analyzers/Payloads/FloatEncoder.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/IdentityEncoder.cs b/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
index 9379db1..5a92eeb 100644
--- a/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
+++ b/src/contrib/Analyzers/Payloads/IdentityEncoder.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/IntegerEncoder.cs b/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
index 1179955..7b16d50 100644
--- a/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
+++ b/src/contrib/Analyzers/Payloads/IntegerEncoder.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/PayloadEncoder.cs b/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
index 5ff6637..5a8b6f6 100644
--- a/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
+++ b/src/contrib/Analyzers/Payloads/PayloadEncoder.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Payloads/PayloadHelper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Payloads/PayloadHelper.cs b/src/contrib/Analyzers/Payloads/PayloadHelper.cs
index fea6676..a3c5619 100644
--- a/src/contrib/Analyzers/Payloads/PayloadHelper.cs
+++ b/src/contrib/Analyzers/Payloads/PayloadHelper.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Properties/AssemblyInfo.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Properties/AssemblyInfo.cs b/src/contrib/Analyzers/Properties/AssemblyInfo.cs
index 8eb8d02..1263583 100644
--- a/src/contrib/Analyzers/Properties/AssemblyInfo.cs
+++ b/src/contrib/Analyzers/Properties/AssemblyInfo.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs b/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
index 54e4755..ac358c5 100644
--- a/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
+++ b/src/contrib/Analyzers/Query/QueryAutoStopWordAnalyzer.cs
@@ -294,7 +294,7 @@ public class QueryAutoStopWordAnalyzer : Analyzer {
}
}
return allStopWords.ToArray();
- }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Ru/RussianAnalyzer.cs b/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
index b37eade..21ad541 100644
--- a/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
+++ b/src/contrib/Analyzers/Ru/RussianAnalyzer.cs
@@ -1,4 +1,4 @@
-/*
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
index a5cec14..fb5f59d 100644
--- a/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
+++ b/src/contrib/Analyzers/Shingle/Codec/OneDimensionalNonWeightedTokenSettingsCodec.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
index 5edb112..cdd401e 100644
--- a/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
+++ b/src/contrib/Analyzers/Shingle/Codec/SimpleThreeDimensionalTokenSettingsCodec.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
index b25a787..47777d5 100644
--- a/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
+++ b/src/contrib/Analyzers/Shingle/Codec/TokenSettingsCodec.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs b/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
index 5300bef..446cf26 100644
--- a/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
+++ b/src/contrib/Analyzers/Shingle/Codec/TwoDimensionalNonWeightedSynonymTokenSettingsCodec.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Matrix/Column.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Column.cs b/src/contrib/Analyzers/Shingle/Matrix/Column.cs
index 1680f9a..8d44300 100644
--- a/src/contrib/Analyzers/Shingle/Matrix/Column.cs
+++ b/src/contrib/Analyzers/Shingle/Matrix/Column.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs b/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
index f0a8ec9..0431026 100644
--- a/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
+++ b/src/contrib/Analyzers/Shingle/Matrix/Matrix.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs b/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
index 1bc6f32..2790236 100644
--- a/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
+++ b/src/contrib/Analyzers/Shingle/Matrix/MatrixPermutationIterator.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/Matrix/Row.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/Matrix/Row.cs b/src/contrib/Analyzers/Shingle/Matrix/Row.cs
index 464bf11..a841f50 100644
--- a/src/contrib/Analyzers/Shingle/Matrix/Row.cs
+++ b/src/contrib/Analyzers/Shingle/Matrix/Row.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs b/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
index c057768..afa3d0b 100644
--- a/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
+++ b/src/contrib/Analyzers/Shingle/ShingleAnalyzerWrapper.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/ShingleFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleFilter.cs b/src/contrib/Analyzers/Shingle/ShingleFilter.cs
index 38c5eec..28de576 100644
--- a/src/contrib/Analyzers/Shingle/ShingleFilter.cs
+++ b/src/contrib/Analyzers/Shingle/ShingleFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs b/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
index 0b242e5..f9130e8 100644
--- a/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
+++ b/src/contrib/Analyzers/Shingle/ShingleMatrixFilter.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/Shingle/TokenPositioner.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/Shingle/TokenPositioner.cs b/src/contrib/Analyzers/Shingle/TokenPositioner.cs
index 8ca65c7..9146888 100644
--- a/src/contrib/Analyzers/Shingle/TokenPositioner.cs
+++ b/src/contrib/Analyzers/Shingle/TokenPositioner.cs
@@ -1,4 +1,4 @@
-/*
+/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/62f018ab/src/contrib/Analyzers/WordlistLoader.cs
----------------------------------------------------------------------
diff --git a/src/contrib/Analyzers/WordlistLoader.cs b/src/contrib/Analyzers/WordlistLoader.cs
index fe5d24e..16fdbae 100644
--- a/src/contrib/Analyzers/WordlistLoader.cs
+++ b/src/contrib/Analyzers/WordlistLoader.cs
@@ -71,7 +71,7 @@
// if ( wordfile == null )
// {
// return new Hashtable();
-// }
+// }
// StreamReader lnr = new StreamReader(wordfile.FullName);
// return GetWordSet(lnr);
// }
@@ -86,9 +86,9 @@
// /// <returns>A Hashtable with the reader's words</returns>
// public static Hashtable GetWordSet(TextReader reader)
// {
-// Hashtable result = new Hashtable();
+// Hashtable result = new Hashtable();
// try
-// {
+// {
// ArrayList stopWords = new ArrayList();
// String word = null;
// while ( ( word = reader.ReadLine() ) != null )