You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2017/08/06 17:59:12 UTC
[14/33] lucenenet git commit: Lucene.Net.Benchmark: Created a simple
English number formatter to spell out numbers into words. Since we don't need
localization,
this is a sufficient replacement for the ICU RuleBasedNumberFormatter.
Lucene.Net.Benchmark: Created a simple English number formatter to spell out numbers into words. Since we don't need localization, this is a sufficient replacement for the ICU RuleBasedNumberFormatter.
Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/1cfbd8b7
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/1cfbd8b7
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/1cfbd8b7
Branch: refs/heads/master
Commit: 1cfbd8b7c35c7f1ae2bd616b44d752eef4d7d180
Parents: a60c5ef
Author: Shad Storhaug <sh...@shadstorhaug.com>
Authored: Tue Aug 1 21:11:54 2017 +0700
Committer: Shad Storhaug <sh...@shadstorhaug.com>
Committed: Wed Aug 2 09:55:15 2017 +0700
----------------------------------------------------------------------
.../ByTask/Feeds/LongToEnglishContentSource.cs | 6 +-
.../ByTask/Feeds/LongToEnglishQueryMaker.cs | 4 +-
.../Lucene.Net.Benchmark.csproj | 1 +
.../Support/EnglishNumberFormatExtensions.cs | 186 +++++++++++++++++++
.../Lucene.Net.Tests.Benchmark.csproj | 1 +
.../TestEnglishNumberFormatExtensions.cs | 38 ++++
6 files changed, 231 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
index fadab82..7c407a2 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishContentSource.cs
@@ -1,4 +1,5 @@
-using System;
+using Lucene.Net.Support;
+using System;
using System.Globalization;
namespace Lucene.Net.Benchmarks.ByTask.Feeds
@@ -55,8 +56,7 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
}
}
- // LUCENENET TODO: Rules based number formatting...(from ICU)
- docData.Body = curCounter.ToString(); //rnbf.format(curCounter);
+ docData.Body = curCounter.ToWords(); //rnbf.format(curCounter);
docData.Name = "doc_" + curCounter.ToString(CultureInfo.InvariantCulture);
docData.Title = "title_" + curCounter.ToString(CultureInfo.InvariantCulture);
docData.SetDate(new DateTime());
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
index f565eb8..78ac924 100644
--- a/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
+++ b/src/Lucene.Net.Benchmark/ByTask/Feeds/LongToEnglishQueryMaker.cs
@@ -4,6 +4,7 @@ using Lucene.Net.Benchmarks.ByTask.Tasks;
using Lucene.Net.Benchmarks.ByTask.Utils;
using Lucene.Net.QueryParsers.Classic;
using Lucene.Net.Search;
+using Lucene.Net.Support;
using Lucene.Net.Util;
using System;
@@ -48,9 +49,8 @@ namespace Lucene.Net.Benchmarks.ByTask.Feeds
{
lock (this)
{
- // LUCENENET TODO: Rules based number formatter (from ICU)
//return parser.Parse("" + rnbf.format(GetNextCounter()) + "");
- return m_parser.Parse(GetNextCounter().ToString());
+ return m_parser.Parse(GetNextCounter().ToWords());
}
}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
index 0241099..f00cd18 100644
--- a/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
+++ b/src/Lucene.Net.Benchmark/Lucene.Net.Benchmark.csproj
@@ -159,6 +159,7 @@
<Compile Include="Quality\Utils\QualityQueriesFinder.cs" />
<Compile Include="Quality\Utils\SimpleQQParser.cs" />
<Compile Include="Quality\Utils\SubmissionReport.cs" />
+ <Compile Include="Support\EnglishNumberFormatExtensions.cs" />
<Compile Include="Utils\ExtractReuters.cs" />
<Compile Include="Utils\ExtractWikipedia.cs" />
<Compile Include="..\CommonAssemblyInfo.cs">
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
new file mode 100644
index 0000000..71362f0
--- /dev/null
+++ b/src/Lucene.Net.Benchmark/Support/EnglishNumberFormatExtensions.cs
@@ -0,0 +1,186 @@
+using System;
+using System.Text;
+
+namespace Lucene.Net.Support
+{
+ /// <summary>
+ /// Extension methods to spell out numbers into English.
+ /// <para/>
+ /// Inspiration: https://stackoverflow.com/a/2601001
+ /// </summary>
+ public static class EnglishNumberFormatExtensions
+ {
+ private const long Quadrillion = Trillion * 1000;
+ private const long Trillion = Billion * 1000;
+ private const long Billion = Million * 1000;
+ private const long Million = Thousand * 1000;
+ private const long Thousand = Hundred * 10;
+ private const long Hundred = 100;
+
+ /// <summary>
+ /// Returns the spelled-out English words for the provided <paramref name="value"/>.
+ /// </summary>
+ public static string ToWords(this int value)
+ {
+ return ToWords((long)value);
+ }
+
+ /// <summary>
+ /// Returns the spelled-out English words for the provided <paramref name="value"/>.
+ /// </summary>
+ public static string ToWords(this long value)
+ {
+ return ToWords(value, new StringBuilder()).ToString();
+ }
+ private static StringBuilder ToWords(long value, StringBuilder builder)
+ {
+ if (value == 0) builder.Append("zero");
+
+ if (value < 0)
+ {
+ builder.Append("negative ");
+ ToWords(Math.Abs(value), builder);
+ }
+
+ long unit = 0;
+
+ if (value >= Quadrillion)
+ {
+ unit = (value / Quadrillion);
+ value -= unit * Quadrillion;
+
+ ToWords(unit, builder);
+ builder.Append(" quadrillion");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= Trillion)
+ {
+ unit = (value / Trillion);
+ value -= unit * Trillion;
+
+ ToWords(unit, builder);
+ builder.Append(" trillion");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= Billion)
+ {
+ unit = (value / Billion);
+ value -= unit * Billion;
+
+ ToWords(unit, builder);
+ builder.Append(" billion");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= Million)
+ {
+ unit = (value / Million);
+ value -= unit * Million;
+
+ ToWords(unit, builder);
+ builder.Append(" million");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= Thousand)
+ {
+ unit = (value / Thousand);
+ value -= unit * Thousand;
+
+ ToWords(unit, builder);
+ builder.Append(" thousand");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= Hundred)
+ {
+ unit = (value / Hundred);
+ value -= unit * Hundred;
+
+ ToWords(unit, builder);
+ builder.Append(" hundred");
+ if (value > 0) builder.Append(" ");
+ }
+
+ if (value >= 90)
+ {
+ value -= 90;
+ builder.Append("ninety");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 80)
+ {
+ value -= 80;
+ builder.Append("eighty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 70)
+ {
+ value -= 70;
+ builder.Append("seventy");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 60)
+ {
+ value -= 60;
+ builder.Append("sixty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 50)
+ {
+ value -= 50;
+ builder.Append("fifty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 40)
+ {
+ value -= 40;
+ builder.Append("forty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 30)
+ {
+ value -= 30;
+ builder.Append("thirty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value >= 20)
+ {
+ value -= 20;
+ builder.Append("twenty");
+ if (value > 0) builder.Append("-");
+ }
+
+ if (value == 19) builder.Append("nineteen");
+ if (value == 18) builder.Append("eighteen");
+ if (value == 17) builder.Append("seventeen");
+ if (value == 16) builder.Append("sixteen");
+ if (value == 15) builder.Append("fifteen");
+ if (value == 14) builder.Append("fourteen");
+ if (value == 13) builder.Append("thirteen");
+ if (value == 12) builder.Append("twelve");
+ if (value == 11) builder.Append("eleven");
+ if (value == 10) builder.Append("ten");
+ if (value == 9) builder.Append("nine");
+ if (value == 8) builder.Append("eight");
+ if (value == 7) builder.Append("seven");
+ if (value == 6) builder.Append("six");
+ if (value == 5) builder.Append("five");
+ if (value == 4) builder.Append("four");
+ if (value == 3) builder.Append("three");
+ if (value == 2) builder.Append("two");
+ if (value == 1) builder.Append("one");
+
+ return builder;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
index c57a59f..5c9ffe1 100644
--- a/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
+++ b/src/Lucene.Net.Tests.Benchmark/Lucene.Net.Tests.Benchmark.csproj
@@ -68,6 +68,7 @@
<Compile Include="Properties\AssemblyInfo.cs" />
<Compile Include="Quality\TestQualityRun.cs" />
<Compile Include="Support\TestApiConsistency.cs" />
+ <Compile Include="Support\TestEnglishNumberFormatExtensions.cs" />
<Compile Include="Support\TestExceptionSerialization.cs" />
</ItemGroup>
<ItemGroup>
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/1cfbd8b7/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
----------------------------------------------------------------------
diff --git a/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
new file mode 100644
index 0000000..68cc70a
--- /dev/null
+++ b/src/Lucene.Net.Tests.Benchmark/Support/TestEnglishNumberFormatExtensions.cs
@@ -0,0 +1,38 @@
+using Lucene.Net.Attributes;
+using NUnit.Framework;
+
+namespace Lucene.Net.Support
+{
+ /*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+ [TestFixture]
+ public class TestEnglishNumberFormatExtensions
+ {
+ [Test, LuceneNetSpecific]
+ public void TestToWords()
+ {
+ Assert.AreEqual("twenty-one", 21.ToWords());
+ Assert.AreEqual("one thousand two hundred thirty-four", 1234.ToWords());
+ Assert.AreEqual("six million four hundred ninety-one thousand three hundred forty-eight", 6491348.ToWords());
+ Assert.AreEqual("one hundred thirty", 130.ToWords());
+ Assert.AreEqual("one hundred thirty-seven", 137.ToWords());
+ Assert.AreEqual("seven hundred forty-nine million one hundred thirty-two thousand one hundred forty-six", 749132146.ToWords());
+ Assert.AreEqual("nine hundred ninety-nine billion seven hundred forty-nine million one hundred thirty-two thousand one hundred forty-six", 999749132146.ToWords());
+ }
+ }
+}