You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2019/09/06 09:01:19 UTC

[lucenenet] 09/21: SWEEP: Lucene.Net.Analysis.Common: Fixed culture sensitivity issues converting numbers to strings and uppercasing/lowercasing

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit 444b86b83cf9977c274011538d9075f33cbc061a
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Tue Sep 3 10:17:05 2019 +0700

    SWEEP: Lucene.Net.Analysis.Common: Fixed culture sensitivity issues converting numbers to strings and uppercasing/lowercasing
---
 src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs |  2 +-
 .../Analysis/Cn/ChineseTokenizer.cs                         |  2 +-
 .../Analysis/Compound/Hyphenation/HyphenationTree.cs        |  2 +-
 .../Analysis/Compound/Hyphenation/TernaryTree.cs            |  6 +++---
 .../Analysis/Hunspell/Dictionary.cs                         |  4 ++--
 .../Analysis/Miscellaneous/CapitalizationFilter.cs          |  2 +-
 .../Analysis/Miscellaneous/PatternAnalyzer.cs               |  4 ++--
 .../Analysis/Miscellaneous/TruncateTokenFilterFactory.cs    |  6 ++++--
 .../Analysis/Pt/RSLPStemmerBase.cs                          | 13 +++++++------
 .../Analysis/Util/AbstractAnalysisFactory.cs                |  4 ++--
 src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs   |  3 ++-
 11 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
index babbee1..9388afa 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cjk/CJKTokenizer.cs
@@ -252,7 +252,7 @@ namespace Lucene.Net.Analysis.Cjk
                             }
 
                             // store the LowerCase(c) in the buffer
-                            buffer[length++] = char.ToLower(c);
+                            buffer[length++] = char.ToLowerInvariant(c);
                             tokenType = SINGLE_TOKEN_TYPE;
 
                             // break the procedure if buffer overflowed!
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
index 9b127df..f60c340 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Cn/ChineseTokenizer.cs
@@ -89,7 +89,7 @@ namespace Lucene.Net.Analysis.Cn
             {
                 start = offset - 1;
             }
-            buffer[length++] = char.ToLower(c); // buffer it
+            buffer[length++] = char.ToLowerInvariant(c); // buffer it
 
         }
 
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
index 1a87cb0..2342d2c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/HyphenationTree.cs
@@ -592,7 +592,7 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
 
         // public override void printStats(PrintStream @out)
         // {
-        //@out.println("Value space size = " + Convert.ToString(vspace.length()));
+        //@out.println("Value space size = " + Convert.ToString(vspace.length(), CultureInfo.InvariantCulture));
         //base.printStats(@out);
 
         // }
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
index b0cbc23..a44222b 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Compound/Hyphenation/TernaryTree.cs
@@ -789,10 +789,10 @@ namespace Lucene.Net.Analysis.Compound.Hyphenation
 
         public virtual void PrintStats(TextWriter @out)
         {
-            @out.WriteLine("Number of keys = " + Convert.ToString(m_length));
-            @out.WriteLine("Node count = " + Convert.ToString(m_freenode));
+            @out.WriteLine("Number of keys = " + Convert.ToString(m_length)); // LUCENENET: Intentionally using current culture
+            @out.WriteLine("Node count = " + Convert.ToString(m_freenode)); // LUCENENET: Intentionally using current culture
             // System.out.println("Array length = " + Integer.toString(eq.length));
-            @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length));
+            @out.WriteLine("Key Array length = " + Convert.ToString(m_kv.Length)); // LUCENENET: Intentionally using current culture
 
             /*
              * for(int i=0; i<kv.length(); i++) if ( kv.get(i) != 0 )
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
index ba7efaa..39ddc66 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Hunspell/Dictionary.cs
@@ -1134,7 +1134,7 @@ namespace Lucene.Net.Analysis.Hunspell
                 if (ignoreCase && iconv == null)
                 {
                     // if we have no input conversion mappings, do this on-the-fly
-                    ch = char.ToLower(ch);
+                    ch = char.ToLowerInvariant(ch);
                 }
 
                 reuse.Append(ch);
@@ -1154,7 +1154,7 @@ namespace Lucene.Net.Analysis.Hunspell
                 {
                     for (int i = 0; i < reuse.Length; i++)
                     {
-                        reuse[i] = char.ToLower(reuse[i]);
+                        reuse[i] = char.ToLowerInvariant(reuse[i]);
                     }
                 }
             }
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
index 2eaaf54..ec92956 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/CapitalizationFilter.cs
@@ -271,7 +271,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
         {
             get
             {
-                return (culture != null) ? culture : CultureInfo.CurrentCulture;
+                return culture ?? CultureInfo.CurrentCulture;
             }
         }
     }
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
index 70e7ce2..ac4d888 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/PatternAnalyzer.cs
@@ -411,7 +411,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                         string text = str.Substring(start, end - start);
                         if (toLowerCase)
                         {
-                            text = text.ToLower();
+                            text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
                         }
                         termAtt.SetEmpty().Append(text);
                         offsetAtt.SetOffset(CorrectOffset(start), CorrectOffset(end));
@@ -526,7 +526,7 @@ namespace Lucene.Net.Analysis.Miscellaneous
                         text = s.Substring(start, i - start);
                         if (toLowerCase)
                         {
-                            text = text.ToLower();
+                            text = text.ToLower(); // LUCENENET: Since this class is obsolete, we aren't going to bother with passing culture in the constructor.
                         }
                         //          if (toLowerCase) {            
                         ////            use next line once JDK 1.5 String.toLowerCase() performance regression is fixed
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
index a1c4cec..cecbabb 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Miscellaneous/TruncateTokenFilterFactory.cs
@@ -46,10 +46,12 @@ namespace Lucene.Net.Analysis.Miscellaneous
         public TruncateTokenFilterFactory(IDictionary<string, string> args) 
             : base(args)
         {
-            prefixLength = sbyte.Parse(Get(args, PREFIX_LENGTH_KEY, "5"), NumberStyles.Integer, CultureInfo.InvariantCulture);
+            var prefixLengthString = Get(args, PREFIX_LENGTH_KEY, "5");
+            prefixLength = sbyte.Parse(prefixLengthString, NumberStyles.Integer, CultureInfo.InvariantCulture);
             if (prefixLength < 1)
             {
-                throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLength.ToString(CultureInfo.InvariantCulture));
+                // LUCENENET specific - showing the original string provided by the user so we don't need to worry about culture-specific number conversion issues
+                throw new System.ArgumentException(PREFIX_LENGTH_KEY + " parameter must be a positive number: " + prefixLengthString);
             }
             if (args.Count > 0)
             {
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
index 26f3c59..885f76a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Pt/RSLPStemmerBase.cs
@@ -3,6 +3,7 @@ using Lucene.Net.Support;
 using Lucene.Net.Util;
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 using System.Linq;
 using System.Text;
@@ -305,8 +306,8 @@ namespace Lucene.Net.Analysis.Pt
             }
             //Debug.Assert(headerPattern.GetGroupNumbers().Length == 4);
             string name = matcher.Groups[1].Value;
-            int min = int.Parse(matcher.Groups[2].Value);
-            int type = int.Parse(matcher.Groups[3].Value);
+            int min = int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture);
+            int type = int.Parse(matcher.Groups[3].Value, CultureInfo.InvariantCulture);
             string[] suffixes = ParseList(matcher.Groups[4].Value);
             Rule[] rules = ParseRules(r, type);
             return new Step(name, rules, min, suffixes);
@@ -321,14 +322,14 @@ namespace Lucene.Net.Analysis.Pt
                 Match matcher = stripPattern.Match(line);
                 if (matcher.Success)
                 {
-                    rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), ""));
+                    rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), ""));
                 }
                 else
                 {
                     matcher = repPattern.Match(line);
                     if (matcher.Success)
                     {
-                        rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value));
+                        rules.Add(new Rule(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value));
                     }
                     else
                     {
@@ -337,11 +338,11 @@ namespace Lucene.Net.Analysis.Pt
                         {
                             if (type == 0)
                             {
-                                rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
+                                rules.Add(new RuleWithSuffixExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
                             }
                             else
                             {
-                                rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
+                                rules.Add(new RuleWithSetExceptions(matcher.Groups[1].Value, int.Parse(matcher.Groups[2].Value, CultureInfo.InvariantCulture), matcher.Groups[3].Value, ParseList(matcher.Groups[4].Value)));
                             }
                         }
                         else
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
index 0b76dc5..4241335 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/AbstractAnalysisFactory.cs
@@ -236,7 +236,7 @@ namespace Lucene.Net.Analysis.Util
         /// </summary>
         protected float RequireSingle(IDictionary<string, string> args, string name)
         {
-            return float.Parse(Require(args, name));
+            return float.Parse(Require(args, name), CultureInfo.InvariantCulture);
         }
 
         /// <summary>
@@ -248,7 +248,7 @@ namespace Lucene.Net.Analysis.Util
             if (args.TryGetValue(name, out s))
             {
                 args.Remove(name);
-                return float.Parse(s);
+                return float.Parse(s, CultureInfo.InvariantCulture);
             }
             return defaultVal;
         }
diff --git a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
index 85a66c6..cf8a043 100644
--- a/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
+++ b/src/Lucene.Net.Analysis.Stempel/Egothor.Stemmer/DiffIt.cs
@@ -1,6 +1,7 @@
 using Lucene.Net.Support;
 using System;
 using System.Collections.Generic;
+using System.Globalization;
 using System.IO;
 using System.Text;
 using Console = Lucene.Net.Support.SystemConsole;
@@ -76,7 +77,7 @@ namespace Egothor.Stemmer
         internal static int Get(int i, string s)
         {
             int result;
-            if (!int.TryParse(s.Substring(i, 1), out result))
+            if (!int.TryParse(s.Substring(i, 1), NumberStyles.Integer, CultureInfo.InvariantCulture, out result))
             {
                 return 1;
             }