You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2019/09/06 09:01:19 UTC
[lucenenet] 09/21: SWEEP: Lucene.Net.Analysis.Common: Fixed culture
sensitivity issues converting numbers to strings and
uppercasing/lowercasing
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 444b86b83cf9977c274011538d9075f33cbc061a
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Tue Sep 3 10:17:05 2019 +0700
SWEEP: Lucene.Net.Analysis.Common: Fixed culture sensitivity issues converting numbers to strings and uppercasing/lowercasing
---
src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs | 2 +-
.../Analysis/Cn/ChineseTokenizer.cs | 2 +-
.../Analysis/Compound/Hyphenation/HyphenationTree.cs | 2 +-
.../Analysis/Compound/Hyphenation/TernaryTree.cs | 6 +++---
.../Analysis/Hunspell/Dictionary.cs | 4 ++--
.../Analysis/Miscellaneous/CapitalizationFilter.cs | 2 +-
.../Analysis/Miscellaneous/PatternAnalyzer.cs | 4 ++--
.../Analysis/Miscellaneous/TruncateTokenFilterFactory.cs | 6 ++++--
.../Analysis/Pt/RSLPStemmerBase.cs | 13 +++++++------
.../Analysis/Util/AbstractAnalysisFactory.cs | 4 ++--
src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs | 3 ++-
11 files changed, 26 insertions(+), 22 deletions(-)
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
index babbee1..9388afa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -252,7 +252,7 @@ namespace Lucene.Net.Analysis.Cjk
}
// store the LowerCase(c) in the buffer
- buffer[length++] = char.ToLower(c);
+ buffer[length++] = char.ToLowerInvariant(c);
tokenType = SINGLE_TOKEN_TYPE;
// break the procedure if buffer overflowed!
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
index 9b127df..f60c340 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -89,7 +89,7 @@ namespace Lucene.Net.Analysis.Cn
{
start = offset - 1;
}
- buffer[length++] = char.ToLower(c); // buffer it
+ buffer[length++] = char.ToLowerInvariant(c); // buffer it
}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
index 1a87cb0..2342d2c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -592,7 +592,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
// public override void printStats(PrintStream @out)
// {
- //@out.println("Value space size = " + Convert.ToString(vspace.length()));
+ //@out.println("Value space size = " + Convert.ToString(vspace.length(), CultureInfo.InvariantCulture));
//base.printStats(@out);
// }
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
index b0cbc23..a44222b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
@@ -789,10 +789,10 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
public virtual void PrintStats(TextWriter @out)
{
- @out.WriteLine("Number of keys = " + Convert.ToString(m_length));
- @out.WriteLine("Node count = " + Convert.ToString(m_freenode));
+ @out.WriteLine("Number of keys = " + Convert.ToString(m_length)); // LUCENENET: Intentionally using current culture
+ @out.WriteLine("Node count = " + Convert.ToString(m_freenode)); // LUCENENET: Intentionally using current culture
// System.out.println("Array length = " + Integer.toString(eq.length));
- @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length));
+ @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length)); // LUCENENET: Intentionally using current culture
/*
* for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index ba7efaa..39ddc66 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -1134,7 +1134,7 @@ namespace Lucene.Net.Analysis.Hunspell
if (ignoreCase && iconv == null)
{
// if we have no input conversion mappings, do this on-the-fly
- ch = char.ToLower(ch);
+ ch = char.ToLowerInvariant(ch);
}
reuse.Append(ch);
@@ -1154,7 +1154,7 @@ namespace Lucene.Net.Analysis.Hunspell
{
for (int i = 0; i < reuse.Length; i++)
{
- reuse[i] = char.ToLower(reuse[i]);
+ reuse[i] = char.ToLowerInvariant(reuse[i]);
}
}
}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index 2eaaf54..ec92956 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -271,7 +271,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
{
get
{
- return (culture != null) ? culture : CultureInfo.CurrentCulture;
+ return culture ?? CultureInfo.CurrentCulture;
}
}
}
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 70e7ce2..ac4d888 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -411,7 +411,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
string text = str.Substring(start, end - start);
if (toLowerCase)
{
- text = text.ToLower();
+ text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
}
termAtt.SetEmpty().Append(text);
offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
@@ -526,7 +526,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
text = s.Substring(start, i - start);
if (toLowerCase)
{
- text = text.ToLower();
+ text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
}
// if (toLowerCase) {
//// use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
index a1c4cec..cecbabb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
@@ -46,10 +46,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
public TruncateTokenFilterFactory(IDictionary<string, string> args)
: base(args)
{
- prefixLength = sbyte.Parse(Get(args, PREFIX_LENGTH_KEY, "5"), NumberStyles.Integer, CultureInfo.InvariantCulture);
+ var prefixLengthString = Get(args, PREFIX_LENGTH_KEY, "5");
+ prefixLength = sbyte.Parse(prefixLengthString, NumberStyles.Integer, CultureInfo.InvariantCulture);
if (prefixLength < 1)
{
- throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength.ToString(CultureInfo.InvariantCulture));
+ // LUCENENET specific - showing the original string provided by the user so we don't need to worry about culture-specific number conversion issues
+ throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLengthString);
}
if (args.Count > 0)
{
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
index 26f3c59..885f76a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
@@ -3,6 +3,7 @@ using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
+using System.Globalization;
using System.IO;
using System.Linq;
using System.Text;
@@ -305,8 +306,8 @@ namespace Lucene.Net.Analysis.Pt
}
//Debug.Assert(headerPattern.GetGroupNumbers().Length == 4);
string name = matcher.Groups[1].Value;
- int min = int.Parse(matcher.Groups[2].Value);
- int type = int.Parse(matcher.Groups[3].Value);
+ int min = int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture);
+ int type = int.Parse(matcher.Groups[3].Value, CultureInfo.InvariantCulture);
string[] suffixes = ParseList(matcher.Groups[4].Value);
Rule[] rules = ParseRules(r, type);
return new Step(name, rules, min, suffixes);
@@ -321,14 +322,14 @@ namespace Lucene.Net.Analysis.Pt
Match matcher = stripPattern.Match(line);
if (matcher.Success)
{
- rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), ""));
+ rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), ""));
}
else
{
matcher = repPattern.Match(line);
if (matcher.Success)
{
- rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value));
+ rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value));
}
else
{
@@ -337,11 +338,11 @@ namespace Lucene.Net.Analysis.Pt
{
if (type == 0)
{
- rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
+ rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
}
else
{
- rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
+ rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
}
}
else
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index 0b76dc5..4241335 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -236,7 +236,7 @@ namespace Lucene.Net.Analysis.Util
/// </summary>
protected float RequireSingle(IDictionary<string, string> args, string name)
{
- return float.Parse(Require(args, name));
+ return float.Parse(Require(args, name), CultureInfo.InvariantCulture);
}
/// <summary>
@@ -248,7 +248,7 @@ namespace Lucene.Net.Analysis.Util
if (args.TryGetValue(name, out s))
{
args.Remove(name);
- return float.Parse(s);
+ return float.Parse(s, CultureInfo.InvariantCulture);
}
return defaultVal;
}
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
index 85a66c6..cf8a043 100644
--- a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
@@ -1,6 +1,7 @@
using Lucene.Net.Support;
using System;
using System.Collections.Generic;
+using System.Globalization;
using System.IO;
using System.Text;
using Console = Lucene.Net.Support.SystemConsole;
@@ -76,7 +77,7 @@ namespace Egothor.Stemmer
internal static int Get(int i, string s)
{
int result;
- if (!int.TryParse(s.Substring(i, 1), out result))
+ if (!int.TryParse(s.Substring(i, 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out result))
{
return 1;
}