You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2020/08/24 21:19:34 UTC
[lucenenet] 02/09: Lucene.Net.ICU: Reverted extra locking/cloning
for ThaiTokenizer
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 6161f4f10cb1ddbd49cda0432dce0007d27a1891
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Sun Aug 23 23:38:41 2020 +0700
Lucene.Net.ICU: Reverted extra locking/cloning for ThaiTokenizer
---
.../Analysis/Th/ThaiTokenizer.cs | 64 +++++++++-------------
1 file changed, 25 insertions(+), 39 deletions(-)
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
index 283256f..7e0754c 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Th/ThaiTokenizer.cs
@@ -42,12 +42,12 @@ namespace Lucene.Net.Analysis.Th
public class ThaiTokenizer : SegmentingTokenizerBase
{
// LUCENENET specific - DBBI_AVAILABLE removed because ICU always has a dictionary-based BreakIterator
- private static readonly BreakIterator proto = (BreakIterator)BreakIterator.GetWordInstance(new CultureInfo("th")).Clone();
+ private static readonly BreakIterator proto = BreakIterator.GetWordInstance(new CultureInfo("th"));
/// <summary>
/// used for breaking the text into sentences
/// </summary>
- private static readonly BreakIterator sentenceProto = (BreakIterator)BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture).Clone();
+ private static readonly BreakIterator sentenceProto = BreakIterator.GetSentenceInstance(CultureInfo.InvariantCulture);
private readonly ThaiWordBreaker wordBreaker;
private readonly CharArrayIterator wrapper = Analysis.Util.CharArrayIterator.NewWordInstance();
@@ -58,8 +58,6 @@ namespace Lucene.Net.Analysis.Th
private readonly ICharTermAttribute termAtt;
private readonly IOffsetAttribute offsetAtt;
- private readonly object syncLock = new object();
-
/// <summary>
/// Creates a new <see cref="ThaiTokenizer"/> </summary>
public ThaiTokenizer(TextReader reader)
@@ -81,49 +79,37 @@ namespace Lucene.Net.Analysis.Th
protected override void SetNextSentence(int sentenceStart, int sentenceEnd)
{
- // LUCENENET TODO: This class isn't passing thread safety checks.
- // Adding locking and extra cloning of BreakIterator seems to help, but
- // it is not a complete fix.
- lock (syncLock)
- {
- this.sentenceStart = sentenceStart;
- this.sentenceEnd = sentenceEnd;
- wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
- wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
- }
+ this.sentenceStart = sentenceStart;
+ this.sentenceEnd = sentenceEnd;
+ wrapper.SetText(m_buffer, sentenceStart, sentenceEnd - sentenceStart);
+ wordBreaker.SetText(new string(wrapper.Text, wrapper.Start, wrapper.Length));
}
protected override bool IncrementWord()
{
- // LUCENENET TODO: This class isn't passing thread safety checks.
- // Adding locking and extra cloning of BreakIterator seems to help, but
- // it is not a complete fix.
- lock (syncLock)
+ int start = wordBreaker.Current;
+ if (start == BreakIterator.Done)
{
- int start = wordBreaker.Current;
- if (start == BreakIterator.Done)
- {
- return false; // BreakIterator exhausted
- }
-
- // find the next set of boundaries, skipping over non-tokens
- int end = wordBreaker.Next();
- while (end != BreakIterator.Done && !Character.IsLetterOrDigit(Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
- {
- start = end;
- end = wordBreaker.Next();
- }
+ return false; // BreakIterator exhausted
+ }
- if (end == BreakIterator.Done)
- {
- return false; // BreakIterator exhausted
- }
+ // find the next set of boundaries, skipping over non-tokens
+ int end = wordBreaker.Next();
+ while (end != BreakIterator.Done && !Character.IsLetterOrDigit(Character.CodePointAt(m_buffer, sentenceStart + start, sentenceEnd)))
+ {
+ start = end;
+ end = wordBreaker.Next();
+ }
- ClearAttributes();
- termAtt.CopyBuffer(m_buffer, sentenceStart + start, end - start);
- offsetAtt.SetOffset(CorrectOffset(m_offset + sentenceStart + start), CorrectOffset(m_offset + sentenceStart + end));
- return true;
+ if (end == BreakIterator.Done)
+ {
+ return false; // BreakIterator exhausted
}
+
+ ClearAttributes();
+ termAtt.CopyBuffer(m_buffer, sentenceStart + start, end - start);
+ offsetAtt.SetOffset(CorrectOffset(m_offset + sentenceStart + start), CorrectOffset(m_offset + sentenceStart + end));
+ return true;
}
}