You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 12:42:44 UTC
[13/14] lucenenet git commit: IcuBreakIterator: Added a setting to
enable the hacks on demand. They are not required for Analysis.Common,
so they are disabled by default. They are only used for Highlighter.
IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc7b5b52
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc7b5b52
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc7b5b52
Branch: refs/heads/api-work
Commit: fc7b5b52dd64877d5d63498b3d2df4e54c569bd8
Parents: 506f55a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 18:02:48 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:37 2017 +0700
----------------------------------------------------------------------
src/IcuBreakIterator.cs | 23 +++++++++++++++-----
.../PostingsHighlight/PostingsHighlighter.cs | 5 ++++-
2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
index 0bf6007..6028ba1 100644
--- a/src/IcuBreakIterator.cs
+++ b/src/IcuBreakIterator.cs
@@ -50,6 +50,8 @@ namespace Lucene.Net
/// </summary>
protected int m_end;
+ private bool enableHacks = false;
+
public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
: this(type, CultureInfo.CurrentCulture)
{
@@ -63,6 +65,13 @@ namespace Lucene.Net
this.type = type;
}
+
+ public virtual bool EnableHacks
+ {
+ get { return enableHacks; }
+ set { enableHacks = value; }
+ }
+
/// <summary>
/// Sets the current iteration position to the beginning of the text.
/// </summary>
@@ -280,20 +289,22 @@ namespace Lucene.Net
private void LoadBoundaries(int start, int end)
{
- //boundaries = new List<int>();
-
IEnumerable<Icu.Boundary> icuBoundaries;
string offsetText = text.Substring(start, end - start);
-
if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
{
- // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
- icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+ if (enableHacks)
+ {
+ // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+ offsetText = offsetText.Replace("-", "a");
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
}
else
{
- if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+ if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
{
// LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
offsetText = offsetText.Replace("\n", " ");
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
index 63c48bc..db04ee1 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
@@ -131,7 +131,10 @@ namespace Lucene.Net.Search.PostingsHighlight
/// </summary>
protected virtual BreakIterator GetBreakIterator(string field)
{
- return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture);
+ return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)
+ {
+ EnableHacks = true
+ };
}
/// <summary>