You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/02/02 12:42:41 UTC
[10/14] lucenenet git commit: Moved IcuBreakIterator to src\
directory and added it as a linked file to both Lucene.Net.Analysis.Common
and Lucene.Net.Highlighter. This seems like a better option than creating a
separate DLL to share this dependency or d
Moved IcuBreakIterator to src\ directory and added it as a linked file to both Lucene.Net.Analysis.Common and Lucene.Net.Highlighter. This seems like a better option than creating a separate DLL to share this dependency or duplicating it.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/5a7cb173
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/5a7cb173
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/5a7cb173
Branch: refs/heads/api-work
Commit: 5a7cb173489a0e22b6ff890f7c283ee50895e42d
Parents: bed1f16
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Thu Feb 2 16:28:04 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Thu Feb 2 18:30:35 2017 +0700
----------------------------------------------------------------------
src/IcuBreakIterator.cs | 368 +++++++++++++++++++
.../Lucene.Net.Analysis.Common.csproj | 3 +
src/Lucene.Net.Analysis.Common/project.json | 3 +
src/Lucene.Net.Highlighter/IcuBreakIterator.cs | 368 -------------------
.../Lucene.Net.Highlighter.csproj | 4 +-
src/Lucene.Net.Highlighter/project.json | 7 +
6 files changed, 384 insertions(+), 369 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/IcuBreakIterator.cs b/src/IcuBreakIterator.cs
new file mode 100644
index 0000000..0bf6007
--- /dev/null
+++ b/src/IcuBreakIterator.cs
@@ -0,0 +1,368 @@
+\ufeffusing Lucene.Net.Support;
+using System;
+using System.Collections.Generic;
+using System.Globalization;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ /// <summary>
+ /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
+ /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
+ /// provides methods to move forward, reverse, and randomly through a set of text breaks
+ /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
+ /// </summary>
+ // LUCENENET specific type
+ internal class IcuBreakIterator : BreakIterator
+ {
+ private readonly Icu.Locale locale;
+ private readonly Icu.BreakIterator.UBreakIteratorType type;
+
+ private List<int> boundaries = new List<int>();
+ private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
+ private string text;
+
+ /// <summary>
+ /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_start;
+
+ /// <summary>
+ /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
+ /// </summary>
+ protected int m_end;
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
+ : this(type, CultureInfo.CurrentCulture)
+ {
+ }
+
+ public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
+ {
+ if (locale == null)
+ throw new ArgumentNullException("locale");
+ this.locale = new Icu.Locale(locale.Name);
+ this.type = type;
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <returns>The offset of the beginning of the text.</returns>
+ public override int First()
+ {
+ currentBoundaryIndex = 0;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Sets the current iteration position to the end of the text.
+ /// </summary>
+ /// <returns>The text's past-the-end offset.</returns>
+ public override int Last()
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator either forward or backward the specified number of steps.
+ /// Negative values move backward, and positive values move forward. This is
+ /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
+ /// </summary>
+ /// <param name="n">The number of steps to move. The sign indicates the direction
+ /// (negative is backwards, and positive is forwards).</param>
+ /// <returns>The character offset of the boundary position n boundaries away from
+ /// the current one.</returns>
+ public override int Next(int n)
+ {
+ int result = Current;
+ while (n > 0)
+ {
+ result = Next();
+ --n;
+ }
+ while (n < 0)
+ {
+ result = Previous();
+ ++n;
+ }
+ return result;
+ }
+
+ /// <summary>
+ /// Advances the iterator to the next boundary position.
+ /// </summary>
+ /// <returns>The position of the first boundary after this one.</returns>
+ public override int Next()
+ {
+ if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex++;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Advances the iterator backwards, to the last boundary preceding this one.
+ /// </summary>
+ /// <returns>The position of the last boundary position preceding this one.</returns>
+ public override int Previous()
+ {
+ if (currentBoundaryIndex == 0 || boundaries.Count == 0)
+ {
+ return DONE;
+ }
+ currentBoundaryIndex--;
+ return ReturnCurrent();
+ }
+
+ /// <summary>
+ /// Throw <see cref="ArgumentException"/> unless begin <= offset < end.
+ /// </summary>
+ /// <param name="offset"></param>
+ private void CheckOffset(int offset)
+ {
+ if (offset < m_start || offset > m_end)
+ {
+ throw new ArgumentException("offset out of bounds");
+ }
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the first boundary position following
+ /// the specified position.
+ /// </summary>
+ /// <param name="offset">The position from which to begin searching for a break position.</param>
+ /// <returns>The position of the first break after the current position.</returns>
+ public override int Following(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int following = GetLowestIndexGreaterThan(offset);
+ if (following == -1)
+ {
+ currentBoundaryIndex = boundaries.Count - 1;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = following;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetLowestIndexGreaterThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index;
+ }
+ else if (index + 1 < boundaries.Count)
+ {
+ return index + 1;
+ }
+
+ return -1;
+ }
+
+ /// <summary>
+ /// Sets the iterator to refer to the last boundary position before the
+ /// specified position.
+ /// </summary>
+ /// <param name="offset">The position to begin searching for a break from.</param>
+ /// <returns>The position of the last boundary before the starting position.</returns>
+ public override int Preceding(int offset)
+ {
+ CheckOffset(offset);
+
+ if (boundaries.Count == 0)
+ {
+ return DONE;
+ }
+
+ int preceeding = GetHighestIndexLessThan(offset);
+ if (preceeding == -1)
+ {
+ currentBoundaryIndex = 0;
+ return DONE;
+ }
+ else
+ {
+ currentBoundaryIndex = preceeding;
+ }
+ return ReturnCurrent();
+ }
+
+ private int GetHighestIndexLessThan(int offset)
+ {
+ int index = boundaries.BinarySearch(offset);
+ if (index < 0)
+ {
+ return ~index - 1;
+ }
+ else
+ {
+ // NOTE: This is intentionally allowed to return -1 in the case
+ // where index == 0. This state indicates we are before the first boundary.
+ return index - 1;
+ }
+ }
+
+ /// <summary>
+ /// Returns the current iteration position.
+ /// </summary>
+ public override int Current
+ {
+ get { return ReturnCurrent(); }
+ }
+
+ /// <summary>
+ /// Gets the text being analyzed.
+ /// </summary>
+ public override string Text
+ {
+ get
+ {
+ return text;
+ }
+ }
+
+ /// <summary>
+ /// Set the iterator to analyze a new piece of text. This function resets
+ /// the current iteration position to the beginning of the text.
+ /// </summary>
+ /// <param name="newText">The text to analyze.</param>
+ public override void SetText(string newText)
+ {
+ text = newText;
+ currentBoundaryIndex = 0;
+ m_start = 0;
+ m_end = newText.Length;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ public override void SetText(CharacterIterator newText)
+ {
+ text = newText.GetTextAsString();
+ currentBoundaryIndex = 0;
+ m_start = newText.BeginIndex;
+ m_end = newText.EndIndex;
+
+ LoadBoundaries(m_start, m_end);
+ }
+
+ private void LoadBoundaries(int start, int end)
+ {
+ //boundaries = new List<int>();
+
+ IEnumerable<Icu.Boundary> icuBoundaries;
+ string offsetText = text.Substring(start, end - start);
+
+
+ if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
+ {
+ // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
+ icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
+ }
+ else
+ {
+ if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
+ {
+ // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
+ offsetText = offsetText.Replace("\n", " ");
+ // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
+ // begin with capital letters.
+ offsetText = CapitalizeFirst(offsetText);
+ }
+
+ icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
+ }
+
+ boundaries = icuBoundaries
+ .Select(t => new[] { t.Start + start, t.End + start })
+ .SelectMany(b => b)
+ .Distinct()
+ .ToList();
+ }
+
+ /// <summary>
+ /// Returns true if the specified character offset is a text boundary.
+ /// </summary>
+ /// <param name="offset">the character offset to check.</param>
+ /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
+ public override bool IsBoundary(int offset)
+ {
+ CheckOffset(offset);
+ return boundaries.Contains(offset);
+ }
+
+ private int ReturnCurrent()
+ {
+ if (boundaries.Count > 0)
+ {
+ return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
+ ? boundaries[currentBoundaryIndex]
+ : DONE;
+ }
+
+ // If there are no boundaries, we must return the start offset
+ return m_start;
+ }
+
+ /// <summary>
+ /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
+ /// where it doesn't correctly break sentences unless they begin with a capital letter.
+ /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator
+ /// code changed to remove calls to this method.
+ /// </summary>
+ public static string CapitalizeFirst(string s)
+ {
+ bool isNewSentence = true;
+ var result = new StringBuilder(s.Length);
+ for (int i = 0; i < s.Length; i++)
+ {
+ if (isNewSentence && char.IsLetter(s[i]))
+ {
+ result.Append(char.ToUpper(s[i]));
+ isNewSentence = false;
+ }
+ else
+ result.Append(s[i]);
+
+ if (s[i] == '!' || s[i] == '?' || s[i] == '.')
+ {
+ isNewSentence = true;
+ }
+ }
+
+ return result.ToString();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 4e4a0e9..610125f 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -42,6 +42,9 @@
<Reference Include="System.Xml.Linq" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="..\IcuBreakIterator.cs">
+ <Link>IcuBreakIterator.cs</Link>
+ </Compile>
<Compile Include="Analysis\Bg\BulgarianAnalyzer.cs" />
<Compile Include="Analysis\Bg\BulgarianStemFilter.cs" />
<Compile Include="Analysis\Bg\BulgarianStemFilterFactory.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Analysis.Common/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Analysis.Common/project.json b/src/Lucene.Net.Analysis.Common/project.json
index 9b9b2cf..02d5a79 100644
--- a/src/Lucene.Net.Analysis.Common/project.json
+++ b/src/Lucene.Net.Analysis.Common/project.json
@@ -16,6 +16,9 @@
]
},
"compile": {
+ "includeFiles": [
+ "../IcuBreakIterator.cs"
+ ],
"exclude": [
]
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs b/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
deleted file mode 100644
index 72e89b0..0000000
--- a/src/Lucene.Net.Highlighter/IcuBreakIterator.cs
+++ /dev/null
@@ -1,368 +0,0 @@
-\ufeffusing Lucene.Net.Support;
-using System;
-using System.Collections.Generic;
-using System.Globalization;
-using System.Linq;
-using System.Text;
-
-namespace Lucene.Net.Search
-{
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /// <summary>
- /// A <see cref="BreakIterator"/> implementation that encapsulates the functionality
- /// of icu.net's <see cref="Icu.BreakIterator"/> static class. A <see cref="BreakIterator"/>
- /// provides methods to move forward, reverse, and randomly through a set of text breaks
- /// defined by the <see cref="Icu.BreakIterator.UBreakIteratorType"/> enumeration.
- /// </summary>
- // LUCENENET specific type
- internal class IcuBreakIterator : BreakIterator
- {
- private readonly Icu.Locale locale;
- private readonly Icu.BreakIterator.UBreakIteratorType type;
-
- private List<int> boundaries = new List<int>();
- private int currentBoundaryIndex; // Index (not the value) of the current boundary in boundaries
- private string text;
-
- /// <summary>
- /// The start offset for the string, if supplied by a <see cref="CharacterIterator"/>
- /// </summary>
- protected int m_start;
-
- /// <summary>
- /// The end offset for the string, if supplied by a <see cref="CharacterIterator"/>
- /// </summary>
- protected int m_end;
-
- public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type)
- : this(type, CultureInfo.CurrentCulture)
- {
- }
-
- public IcuBreakIterator(Icu.BreakIterator.UBreakIteratorType type, CultureInfo locale)
- {
- if (locale == null)
- throw new ArgumentNullException("locale");
- this.locale = new Icu.Locale(locale.Name);
- this.type = type;
- }
-
- /// <summary>
- /// Sets the current iteration position to the beginning of the text.
- /// </summary>
- /// <returns>The offset of the beginning of the text.</returns>
- public override int First()
- {
- currentBoundaryIndex = 0;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Sets the current iteration position to the end of the text.
- /// </summary>
- /// <returns>The text's past-the-end offset.</returns>
- public override int Last()
- {
- currentBoundaryIndex = boundaries.Count - 1;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Advances the iterator either forward or backward the specified number of steps.
- /// Negative values move backward, and positive values move forward. This is
- /// equivalent to repeatedly calling <see cref="Next()"/> or <see cref="Previous()"/>.
- /// </summary>
- /// <param name="n">The number of steps to move. The sign indicates the direction
- /// (negative is backwards, and positive is forwards).</param>
- /// <returns>The character offset of the boundary position n boundaries away from
- /// the current one.</returns>
- public override int Next(int n)
- {
- int result = Current;
- while (n > 0)
- {
- result = Next();
- --n;
- }
- while (n < 0)
- {
- result = Previous();
- ++n;
- }
- return result;
- }
-
- /// <summary>
- /// Advances the iterator to the next boundary position.
- /// </summary>
- /// <returns>The position of the first boundary after this one.</returns>
- public override int Next()
- {
- if (currentBoundaryIndex >= boundaries.Count - 1 || boundaries.Count == 0)
- {
- return DONE;
- }
- currentBoundaryIndex++;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Advances the iterator backwards, to the last boundary preceding this one.
- /// </summary>
- /// <returns>The position of the last boundary position preceding this one.</returns>
- public override int Previous()
- {
- if (currentBoundaryIndex == 0 || boundaries.Count == 0)
- {
- return DONE;
- }
- currentBoundaryIndex--;
- return ReturnCurrent();
- }
-
- /// <summary>
- /// Throw <see cref="ArgumentException"/> unless begin <= offset < end.
- /// </summary>
- /// <param name="offset"></param>
- private void CheckOffset(int offset)
- {
- if (offset < m_start || offset > m_end)
- {
- throw new ArgumentException("offset out of bounds");
- }
- }
-
- /// <summary>
- /// Sets the iterator to refer to the first boundary position following
- /// the specified position.
- /// </summary>
- /// <param name="offset">The position from which to begin searching for a break position.</param>
- /// <returns>The position of the first break after the current position.</returns>
- public override int Following(int offset)
- {
- CheckOffset(offset);
-
- if (boundaries.Count == 0)
- {
- return DONE;
- }
-
- int following = GetLowestIndexGreaterThan(offset);
- if (following == -1)
- {
- currentBoundaryIndex = boundaries.Count - 1;
- return DONE;
- }
- else
- {
- currentBoundaryIndex = following;
- }
- return ReturnCurrent();
- }
-
- private int GetLowestIndexGreaterThan(int offset)
- {
- int index = boundaries.BinarySearch(offset);
- if (index < 0)
- {
- return ~index;
- }
- else if (index + 1 < boundaries.Count)
- {
- return index + 1;
- }
-
- return -1;
- }
-
- /// <summary>
- /// Sets the iterator to refer to the last boundary position before the
- /// specified position.
- /// </summary>
- /// <param name="offset">The position to begin searching for a break from.</param>
- /// <returns>The position of the last boundary before the starting position.</returns>
- public override int Preceding(int offset)
- {
- CheckOffset(offset);
-
- if (boundaries.Count == 0)
- {
- return DONE;
- }
-
- int preceeding = GetHighestIndexLessThan(offset);
- if (preceeding == -1)
- {
- currentBoundaryIndex = 0;
- return DONE;
- }
- else
- {
- currentBoundaryIndex = preceeding;
- }
- return ReturnCurrent();
- }
-
- private int GetHighestIndexLessThan(int offset)
- {
- int index = boundaries.BinarySearch(offset);
- if (index < 0)
- {
- return ~index - 1;
- }
- else
- {
- // NOTE: This is intentionally allowed to return -1 in the case
- // where index == 0. This state indicates we are before the first boundary.
- return index - 1;
- }
- }
-
- /// <summary>
- /// Returns the current iteration position.
- /// </summary>
- public override int Current
- {
- get { return ReturnCurrent(); }
- }
-
- /// <summary>
- /// Gets the text being analyzed.
- /// </summary>
- public override string Text
- {
- get
- {
- return text;
- }
- }
-
- /// <summary>
- /// Set the iterator to analyze a new piece of text. This function resets
- /// the current iteration position to the beginning of the text.
- /// </summary>
- /// <param name="newText">The text to analyze.</param>
- public override void SetText(string newText)
- {
- text = newText;
- currentBoundaryIndex = 0;
- m_start = 0;
- m_end = newText.Length;
-
- LoadBoundaries(m_start, m_end);
- }
-
- public override void SetText(CharacterIterator newText)
- {
- text = newText.GetTextAsString();
- currentBoundaryIndex = 0;
- m_start = newText.BeginIndex;
- m_end = newText.EndIndex;
-
- LoadBoundaries(m_start, m_end);
- }
-
- private void LoadBoundaries(int start, int end)
- {
- //boundaries = new List<int>();
-
- IEnumerable<Icu.Boundary> icuBoundaries;
- string offsetText = text.Substring(start, end - start);
-
-
- if (type == Icu.BreakIterator.UBreakIteratorType.WORD)
- {
- // LUCENENET TODO: HACK - replacing hyphen with "a" so hyphenated words aren't broken
- icuBoundaries = Icu.BreakIterator.GetWordBoundaries(locale, offsetText.Replace("-", "a"), true);
- }
- else
- {
- if (type == Icu.BreakIterator.UBreakIteratorType.SENTENCE)
- {
- // LUCENENET TODO: HACK - newline character causes incorrect sentence breaking.
- offsetText = offsetText.Replace("\n", " ");
- // LUCENENET TODO: HACK - the ICU sentence logic doesn't work (in English anyway) when sentences don't
- // begin with capital letters.
- offsetText = CapitalizeFirst(offsetText);
- }
-
- icuBoundaries = Icu.BreakIterator.GetBoundaries(type, locale, offsetText);
- }
-
- boundaries = icuBoundaries
- .Select(t => new[] { t.Start + start, t.End + start })
- .SelectMany(b => b)
- .Distinct()
- .ToList();
- }
-
- /// <summary>
- /// Returns true if the specified character offset is a text boundary.
- /// </summary>
- /// <param name="offset">the character offset to check.</param>
- /// <returns><c>true</c> if "offset" is a boundary position, <c>false</c> otherwise.</returns>
- public override bool IsBoundary(int offset)
- {
- CheckOffset(offset);
- return boundaries.Contains(offset);
- }
-
- private int ReturnCurrent()
- {
- if (boundaries.Count > 0)
- {
- return currentBoundaryIndex < boundaries.Count && currentBoundaryIndex > -1
- ? boundaries[currentBoundaryIndex]
- : DONE;
- }
-
- // If there are no boundaries, we must return the start offset
- return m_start;
- }
-
- /// <summary>
- /// LUCENENET TODO: This is a temporary workaround for an issue with icu-dotnet
- /// where it doesn't correctly break sentences unless they begin with a capital letter.
- /// If/when ICU is fixed, this method should be deleted and the IcuBreakIterator
- /// code changed to remove calls to this method.
- /// </summary>
- public static string CapitalizeFirst(string s)
- {
- bool isNewSentence = true;
- var result = new StringBuilder(s.Length);
- for (int i = 0; i < s.Length; i++)
- {
- if (isNewSentence && char.IsLetter(s[i]))
- {
- result.Append(char.ToUpper(s[i]));
- isNewSentence = false;
- }
- else
- result.Append(s[i]);
-
- if (s[i] == '!' || s[i] == '?' || s[i] == '.')
- {
- isNewSentence = true;
- }
- }
-
- return result.ToString();
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
index fedffc8..94eb504 100644
--- a/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
+++ b/src/Lucene.Net.Highlighter/Lucene.Net.Highlighter.csproj
@@ -41,6 +41,9 @@
<Reference Include="System.Xml" />
</ItemGroup>
<ItemGroup>
+ <Compile Include="..\IcuBreakIterator.cs">
+ <Link>IcuBreakIterator.cs</Link>
+ </Compile>
<Compile Include="Highlight\DefaultEncoder.cs" />
<Compile Include="Highlight\GradientFormatter.cs" />
<Compile Include="Highlight\Highlighter.cs" />
@@ -76,7 +79,6 @@
<Compile Include="PostingsHighlight\WholeBreakIterator.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="RectangularArrays.cs" />
- <Compile Include="IcuBreakIterator.cs" />
<Compile Include="VectorHighlight\BaseFragListBuilder.cs" />
<Compile Include="VectorHighlight\BaseFragmentsBuilder.cs" />
<Compile Include="VectorHighlight\BoundaryScanner.cs" />
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/5a7cb173/src/Lucene.Net.Highlighter/project.json
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Highlighter/project.json b/src/Lucene.Net.Highlighter/project.json
index 4200936..7b59d99 100644
--- a/src/Lucene.Net.Highlighter/project.json
+++ b/src/Lucene.Net.Highlighter/project.json
@@ -6,6 +6,13 @@
"Lucene.Net.Analysis.Common": "4.8.0-alpha",
"Lucene.Net.Queries": "4.8.0-alpha"
},
+ "buildOptions": {
+ "compile": {
+ "includeFiles": [
+ "../IcuBreakIterator.cs"
+ ]
+ }
+ },
"frameworks": {
"netstandard1.5": {
"imports": "dnxcore50",