You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2020/08/24 21:19:34 UTC

[lucenenet] 02/09: Lucene.Net.ICU: Reverted extra locking/cloning for ThaiTokenizer

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit 6161f4f10cb1ddbd49cda0432dce0007d27a1891
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Sun Aug 23 23:38:41 2020 +0700

    Lucene.Net.ICU: Reverted extra locking/cloning for ThaiTokenizer
---
 .../Analysis/Th/ThaiTokenizer.cs                   | 64 +++++++++-------------
 1 file changed, 25 insertions(+), 39 deletions(-)

diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index 283256f..7e0754c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -42,12 +42,12 @@ namespace Lucene.Net.Analysis.Th
     public class ThaiTokenizer : SegmentingTokenizerBase
     {
         // LUCENENET specific - DBBI_AVAILABLE removed because ICU always has a dictionary-based BreakIterator
-        private static readonly BreakIterator proto = (BreakIterator)BreakIterator.GetWordInstance(new CultureInfo("th")).Clone();
+        private static readonly BreakIterator proto = BreakIterator.GetWordInstance(new CultureInfo("th"));
 
         /// <summary>
         /// used for breaking the text into sentences
         /// </summary>
-        private static readonly BreakIterator sentenceProto = (BreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture).Clone();
+        private static readonly BreakIterator sentenceProto = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
 
         private readonly ThaiWordBreaker wordBreaker;
         private readonly CharArrayIterator wrapper = Analysis.Util.CharArrayIterator.NewWordInstance();
@@ -58,8 +58,6 @@ namespace Lucene.Net.Analysis.Th
         private readonly ICharTermAttribute termAtt;
         private readonly IOffsetAttribute offsetAtt;
 
-        private readonly object syncLock = new object();
-
         /// <summary>
         /// Creates a new <see cref="ThaiTokenizer"/> </summary>
         public ThaiTokenizer(TextReader reader)
@@ -81,49 +79,37 @@ namespace Lucene.Net.Analysis.Th
 
         protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
         {
-            // LUCENENET TODO: This class isn't passing thread safety checks.
-            // Adding locking and extra cloning of BreakIterator seems to help, but
-            // it is not a complete fix.
-            lock (syncLock)
-            {
-                this.sentenceStart = sentenceStart;
-                this.sentenceEnd = sentenceEnd;
-                wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
-                wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
-            }
+            this.sentenceStart = sentenceStart;
+            this.sentenceEnd = sentenceEnd;
+            wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
+            wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
         }
 
         protected override bool IncrementWord()
         {
-            // LUCENENET TODO: This class isn't passing thread safety checks.
-            // Adding locking and extra cloning of BreakIterator seems to help, but
-            // it is not a complete fix.
-            lock (syncLock)
+            int start = wordBreaker.Current;
+            if (start == BreakIterator.Done)
             {
-                int start = wordBreaker.Current;
-                if (start == BreakIterator.Done)
-                {
-                    return false; // BreakIterator exhausted
-                }
-
-                // find the next set of boundaries, skipping over non-tokens
-                int end = wordBreaker.Next();
-                while (end != BreakIterator.Done && !Character.IsLetterOrDigit(Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
-                {
-                    start = end;
-                    end = wordBreaker.Next();
-                }
+                return false; // BreakIterator exhausted
+            }
 
-                if (end == BreakIterator.Done)
-                {
-                    return false; // BreakIterator exhausted
-                }
+            // find the next set of boundaries, skipping over non-tokens
+            int end = wordBreaker.Next();
+            while (end != BreakIterator.Done && !Character.IsLetterOrDigit(Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
+            {
+                start = end;
+                end = wordBreaker.Next();
+            }
 
-                ClearAttributes();
-                termAtt.CopyBuffer(m_buffer, sentenceStart + start, end - start);
-                offsetAtt.SetOffset(CorrectOffset(m_offset + sentenceStart + start), CorrectOffset(m_offset + sentenceStart + end));
-                return true;
+            if (end == BreakIterator.Done)
+            {
+                return false; // BreakIterator exhausted
             }
+
+            ClearAttributes();
+            termAtt.CopyBuffer(m_buffer, sentenceStart + start, end - start);
+            offsetAtt.SetOffset(CorrectOffset(m_offset + sentenceStart + start), CorrectOffset(m_offset + sentenceStart + end));
+            return true;
         }
     }