You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by sy...@apache.org on 2016/09/01 14:36:40 UTC
[20/22] lucenenet git commit: Fixed a bug in the Analysis.Tr.TurkishLowerCaseFilter that caused the Analysis.Tr.TestTurkishLowerCaseFilter_.TestTurkishLowerCaseFilter() test to fail.

Fixed a bug in the Analysis.Tr.TurkishLowerCaseFilter that caused the Analysis.Tr.TestTurkishLowerCaseFilter_.TestTurkishLowerCaseFilter() test to fail.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/3664f1d7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/3664f1d7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/3664f1d7

Branch: refs/heads/analysis-work
Commit: 3664f1d7dc4f31ce28ef36cbccc7d1cf9b79577f
Parents: bc48844
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Fri Aug 26 23:45:11 2016 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Sat Aug 27 02:20:11 2016 +0700

----------------------------------------------------------------------
 .../Analysis/Tr/TurkishLowerCaseFilter.cs       | 34 ++++++--
 src/Lucene.Net.Core/Lucene.Net.csproj           |  1 +
 src/Lucene.Net.Core/Support/CultureContext.cs   | 81 ++++++++++++++++++++
 3 files changed, 109 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
index 8b53666..4aaee6a 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Tr/TurkishLowerCaseFilter.cs
@@ -34,10 +34,11 @@ namespace Lucene.Net.Analysis.Tr
     public sealed class TurkishLowerCaseFilter : TokenFilter
     {
         private const int LATIN_CAPITAL_LETTER_I = '\u0049';
-        //private const int LATIN_CAPITAL_LETTER_I = '\u0130';
+        private const int LATIN_CAPITAL_LETTER_DOTTED_I = '\u0130';
         private const int LATIN_SMALL_LETTER_I = '\u0069';
         private const int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
         private const int COMBINING_DOT_ABOVE = '\u0307';
+
         private readonly ICharTermAttribute termAtt;
 
         /// <summary>
@@ -62,12 +63,6 @@ namespace Lucene.Net.Analysis.Tr
                 int length = termAtt.Length;
                 for (int i = 0; i < length;)
                 {
-
-                    // LUCENENET TODO: This line is failing, causing the TestTurkishLowerCaseFilter() test to fail. According to the MSDN documentation
-                    // https://msdn.microsoft.com/en-us/library/system.globalization.unicodecategory(v=vs.110).aspx
-                    // a non-spacing mark is a modifier to a character. This logic is expecting the first codepoint to be an upper case Latin I,
-                    // and the second to be a non-spacing mark, but it is coming back as a single codepoint 304 that doesn't match Latin I.
-                    // Also, char.GetUnicodeCategory((char)304) returns UpperCaseLetter (not sure if that is pertinent).
                     int ch = Character.CodePointAt(buffer, i, length);
 
                     iOrAfter = (ch == LATIN_CAPITAL_LETTER_I || (iOrAfter && char.GetUnicodeCategory((char)ch) == UnicodeCategory.NonSpacingMark));
@@ -99,6 +94,31 @@ namespace Lucene.Net.Analysis.Tr
                         }
                     }
 
+                    using (var culture = new CultureContext("tr-TR"))
+                    {
+                        switch (ch)
+                        {
+                            // LUCENENET: The .NET char.ToLower() function works correctly in 
+                            // Turkish as long as the current thread is set to tr-TR (well, technically the 
+                            // culture change is only required for the LATIN_CAPITAL_LETTER_I case). .NET does 
+                            // not split these characters into separate letter/non-spacing mark characters,
+                            // but the user might still input them that way so we still need the above
+                            // block to handle that case.
+                            //
+                            // LUCENENET TODO: Oddly, the Character.ToLowerCase() function below does not work right
+                            // for Turkish. Which begs the question, should this special case be there so Turkish works
+                            // everywhere? Or should we leave it a special case here because that is the way it works in Java?
+                            //
+                            // References:
+                            // http://haacked.com/archive/2012/07/05/turkish-i-problem-and-why-you-should-care.aspx/
+                            // http://www.i18nguy.com/unicode/turkish-i18n.html
+                            case LATIN_CAPITAL_LETTER_I:
+                            case LATIN_CAPITAL_LETTER_DOTTED_I:
+                                i += Character.ToChars(char.ToLower((char)ch), buffer, i);
+                                continue;
+                        }
+                    }
+
                     i += Character.ToChars(Character.ToLowerCase(ch), buffer, i);
                 }
 

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Core/Lucene.Net.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Lucene.Net.csproj b/src/Lucene.Net.Core/Lucene.Net.csproj
index 26c8906..9dbcd4c 100644
--- a/src/Lucene.Net.Core/Lucene.Net.csproj
+++ b/src/Lucene.Net.Core/Lucene.Net.csproj
@@ -623,6 +623,7 @@
     <Compile Include="Support\Compatibility\Collections.cs" />
     <Compile Include="Support\ConcurrentHashMapWrapper.cs" />
     <Compile Include="Support\ConcurrentHashSet.cs" />
+    <Compile Include="Support\CultureContext.cs" />
     <Compile Include="Support\ErrorHandling.cs" />
     <Compile Include="Support\FileStreamExtensions.cs" />
     <Compile Include="Support\HashCodeMerge.cs" />

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/3664f1d7/src/Lucene.Net.Core/Support/CultureContext.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Core/Support/CultureContext.cs b/src/Lucene.Net.Core/Support/CultureContext.cs
new file mode 100644
index 0000000..59f578e
--- /dev/null
+++ b/src/Lucene.Net.Core/Support/CultureContext.cs
@@ -0,0 +1,81 @@
+\ufeffusing System;
+using System.Globalization;
+using System.Threading;
+
+namespace Lucene.Net.Support
+{
+    /// <summary>
+    /// Allows switching the current thread to a new culture in a using block that will automatically 
+    /// return the culture to its previous state upon completion.
+    /// </summary>
+    public class CultureContext : IDisposable
+    {
+        public CultureContext(int culture)
+            : this(new CultureInfo(culture), Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(int culture, int uiCulture)
+            : this(new CultureInfo(culture), new CultureInfo(uiCulture))
+        {
+        }
+
+        public CultureContext(string cultureName)
+            : this(new CultureInfo(cultureName), Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(string cultureName, string uiCultureName)
+            : this(new CultureInfo(cultureName), new CultureInfo(uiCultureName))
+        {
+        }
+
+        public CultureContext(CultureInfo culture)
+            : this(culture, Thread.CurrentThread.CurrentUICulture)
+        {
+        }
+
+        public CultureContext(CultureInfo culture, CultureInfo uiCulture)
+        {
+            if (culture == null)
+                throw new ArgumentNullException("culture");
+            if (uiCulture == null)
+                throw new ArgumentNullException("uiCulture");
+
+            this.currentThread = Thread.CurrentThread;
+
+            // Record the current culture settings so they can be restored later.
+            this.originalCulture = this.currentThread.CurrentCulture;
+            this.originalUICulture = this.currentThread.CurrentUICulture;
+
+            // Set both the culture and UI culture for this context.
+            this.currentThread.CurrentCulture = culture;
+            this.currentThread.CurrentUICulture = uiCulture;
+        }
+
+        private readonly Thread currentThread;
+        private readonly CultureInfo originalCulture;
+        private readonly CultureInfo originalUICulture;
+
+        public CultureInfo OriginalCulture
+        {
+            get { return this.originalCulture; }
+        }
+
+        public CultureInfo OriginalUICulture
+        {
+            get { return this.originalUICulture; }
+        }
+
+        public void RestoreOriginalCulture()
+        {
+            // Restore the culture to the way it was before the constructor was called.
+            this.currentThread.CurrentCulture = this.originalCulture;
+            this.currentThread.CurrentUICulture = this.originalUICulture;
+        }
+        public void Dispose()
+        {
+            RestoreOriginalCulture();
+        }
+    }
+}