You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 12:42:44 UTC

[13/14] lucenenet git commit: IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.

IcuBreakIterator: Added a setting to enable the hacks on demand. They are not required for Analysis.Common, so they are disabled by default. They are only used for Highlighter.


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/fc7b5b52
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/fc7b5b52
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/fc7b5b52

Branch: refs/heads/api-work
Commit: fc7b5b52dd64877d5d63498b3d2df4e54c569bd8
Parents: 506f55a
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 18:02:48 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:37 2017 +0700

----------------------------------------------------------------------
 src/IcuBreakIterator.cs                         | 23 +++++++++++++++-----
 .../PostingsHighlight/PostingsHighlighter.cs    |  5 ++++-
 2 files changed, 21 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
index 0bf6007..6028ba1 100644
--- a/src/IcuBreakIterator.cs
+++ b/src/IcuBreakIterator.cs
@@ -50,6 +50,8 @@ namespace Lucene.Net
         /// </summary>
         protected int m_end;
 
+        private bool enableHacks = false;
+
         public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
             : this(type, CultureInfo.CurrentCulture)
         {
@@ -63,6 +65,13 @@ namespace Lucene.Net
             this.type = type;
         }
 
+        
+        public virtual bool EnableHacks
+        {
+            get { return enableHacks; }
+            set { enableHacks = value; }
+        }
+
         /// <summary>
         /// Sets the current iteration position to the beginning of the text.
         /// </summary>
@@ -280,20 +289,22 @@ namespace Lucene.Net
 
         private void LoadBoundaries(int start, int end)
         {
-            //boundaries = new List<int>();
-
             IEnumerable<Icu.Boundary> icuBoundaries;
             string offsetText = text.Substring(start, end - start);
 
-
             if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
             {
-                // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
-                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+                if (enableHacks)
+                {
+                    // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+                    offsetText = offsetText.Replace("-", "a");
+                }
+                
+                icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText, true);
             }
             else
             {
-                if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+                if (enableHacks && type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
                 {
                     // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
                     offsetText = offsetText.Replace("\n", " ");

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/fc7b5b52/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
index 63c48bc..db04ee1 100644
--- a/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
+++ b/src/Lucene.Net.Highlighter/PostingsHighlight/PostingsHighlighter.cs
@@ -131,7 +131,10 @@ namespace Lucene.Net.Search.PostingsHighlight
         /// </summary>
         protected virtual BreakIterator GetBreakIterator(string field)
         {
-            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture);
+            return new IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType.SENTENCE, CultureInfo.InvariantCulture)
+            {
+                EnableHacks = true
+            };
         }
 
         /// <summary>