You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2022/10/31 06:19:12 UTC
[lucenenet] 04/14: PERFORMANCE: Lucene.Net.Analysis.Util.CharacterUtils: Use spans and stackalloc to reduce heap allocations when lowercasing. Added system property named "maxStackLimit" that defaults to 2048 bytes.
This is an automated email from the ASF dual-hosted git repository.
nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git
commit 98c52a8649cbb7e9c6f209556a84c49f59ae3ec8
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Thu Oct 27 01:13:34 2022 +0700
PERFORMANCE: Lucene.Net.Analysis.Util.CharacterUtils: Use spans and stackalloc to reduce heap allocations when lowercasing. Added system property named "maxStackLimit" that defaults to 2048 bytes.
---
.build/dependencies.props | 1 +
.../Analysis/Util/CharacterUtils.cs | 40 +++++++++++++++-------
.../Lucene.Net.Analysis.Common.csproj | 8 +++++
.../Configuration/TestConfigurationService.cs | 8 +++++
.../Startup.cs | 3 +-
src/Lucene.Net.Tests/Support/TestApiConsistency.cs | 2 +-
src/Lucene.Net/Lucene.Net.csproj | 1 +
src/Lucene.Net/Util/Constants.cs | 7 +++-
8 files changed, 55 insertions(+), 15 deletions(-)
diff --git a/.build/dependencies.props b/.build/dependencies.props
index 7a9ad2f27..1cfb3fc69 100644
--- a/.build/dependencies.props
+++ b/.build/dependencies.props
@@ -73,6 +73,7 @@
<RandomizedTestingGeneratorsPackageVersion>2.7.8</RandomizedTestingGeneratorsPackageVersion>
<SharpZipLibPackageVersion>1.1.0</SharpZipLibPackageVersion>
<Spatial4nPackageVersion>0.4.1.1</Spatial4nPackageVersion>
+ <SystemMemoryPackageVersion>4.5.4</SystemMemoryPackageVersion>
<SystemReflectionEmitPackageVersion>4.3.0</SystemReflectionEmitPackageVersion>
<SystemReflectionEmitILGenerationPackageVersion>4.3.0</SystemReflectionEmitILGenerationPackageVersion>
<SystemReflectionTypeExtensionsPackageVersion>4.3.0</SystemReflectionTypeExtensionsPackageVersion>
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index 8458ca33e..179e095b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -183,10 +183,17 @@ namespace Lucene.Net.Analysis.Util
Debugging.Assert(offset <= 0 && offset <= buffer.Length);
}
- // Slight optimization, eliminating a few method calls internally
- CultureInfo.InvariantCulture.TextInfo
- .ToLower(new string(buffer, offset, length))
- .CopyTo(0, buffer, offset, length);
+ // Reduce allocations by using the stack and spans
+ var source = new ReadOnlySpan<char>(buffer, offset, length);
+ var destination = buffer.AsSpan(offset, length);
+ var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length];
+ source.ToLower(spare, CultureInfo.InvariantCulture);
+ spare.CopyTo(destination);
+
+ //// Slight optimization, eliminating a few method calls internally
+ //CultureInfo.InvariantCulture.TextInfo
+ // .ToLower(new string(buffer, offset, length))
+ // .CopyTo(0, buffer, offset, length);
//// Optimization provided by Vincent Van Den Berghe:
//// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower
@@ -194,8 +201,9 @@ namespace Lucene.Net.Analysis.Util
// .ToLowerInvariant()
// .CopyTo(0, buffer, offset, length);
- // Original (slow) Lucene implementation:
- //for (int i = offset; i < limit; )
+ //// Original (slow) Lucene implementation:
+ //int limit = length - offset;
+ //for (int i = offset; i < limit;)
//{
// i += Character.ToChars(
// Character.ToLower(
@@ -217,10 +225,17 @@ namespace Lucene.Net.Analysis.Util
Debugging.Assert(offset <= 0 && offset <= buffer.Length);
}
- // Slight optimization, eliminating a few method calls internally
- CultureInfo.InvariantCulture.TextInfo
- .ToUpper(new string(buffer, offset, length))
- .CopyTo(0, buffer, offset, length);
+ // Reduce 2 heap allocations by using the stack and spans
+ var source = new ReadOnlySpan<char>(buffer, offset, length);
+ var destination = buffer.AsSpan(offset, length);
+ var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length];
+ source.ToUpper(spare, CultureInfo.InvariantCulture);
+ spare.CopyTo(destination);
+
+ //// Slight optimization, eliminating a few method calls internally
+ //CultureInfo.InvariantCulture.TextInfo
+ // .ToUpper(new string(buffer, offset, length))
+ // .CopyTo(0, buffer, offset, length);
//// Optimization provided by Vincent Van Den Berghe:
//// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower
@@ -228,8 +243,9 @@ namespace Lucene.Net.Analysis.Util
// .ToUpperInvariant()
// .CopyTo(0, buffer, offset, length);
- // Original (slow) Lucene implementation:
- //for (int i = offset; i < limit; )
+ //// Original (slow) Lucene implementation:
+ //int limit = length - offset;
+ //for (int i = offset; i < limit;)
//{
// i += Character.ToChars(
// Character.ToUpper(
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 4fcfaef03..5a64acc9e 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -50,6 +50,14 @@
<ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" />
</ItemGroup>
+ <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
+ <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" />
+ </ItemGroup>
+
+ <ItemGroup Condition=" '$(TargetFramework)' == 'net462' ">
+ <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" />
+ </ItemGroup>
+
<ItemGroup Condition=" '$(TargetFramework)' == 'net462' ">
<Reference Include="System.Xml" />
</ItemGroup>
diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
index 1090f77ad..a2d662e87 100644
--- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
+++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
@@ -34,5 +34,13 @@ namespace Lucene.Net.Configuration
Assert.AreEqual("barValue", ConfigurationSettings.CurrentConfiguration["bar"]);
Assert.AreEqual("bazValue", ConfigurationSettings.CurrentConfiguration["baz"]);
}
+
+ [Test]
+ public void TestCustomMaxStackByteLimit()
+ {
+ // This custom value is configured in Startup.cs.
+ // 5000 chosen because it is not likely to ever be made a default.
+ Assert.AreEqual(5000, Constants.MaxStackByteLimit);
+ }
}
}
diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
index 0edb611cd..e21ad9594 100644
--- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
+++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
@@ -41,7 +41,8 @@ public class Startup : LuceneTestFrameworkInitializer
{
["foo"] = "fooValue",
["bar"] = "barValue",
- ["baz"] = "bazValue"
+ ["baz"] = "bazValue",
+ ["maxStackByteLimit"] = "5000",
});
ConfigureServices(serviceCollection, configurationBuilder);
IServiceProvider services = serviceCollection.BuildServiceProvider();
diff --git a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
index 8d7bfc57b..6ac4ff2e3 100644
--- a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
+++ b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net
[TestCase(typeof(Lucene.Net.Analysis.Analyzer))]
public override void TestPrivateFieldNames(Type typeFromTargetAssembly)
{
- base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions");
+ base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions|^Lucene\.Net\.Util\.Constants\.MaxStackByteLimit");
}
[Test, LuceneNetSpecific]
diff --git a/src/Lucene.Net/Lucene.Net.csproj b/src/Lucene.Net/Lucene.Net.csproj
index f6a8bfa36..3e70f21c6 100644
--- a/src/Lucene.Net/Lucene.Net.csproj
+++ b/src/Lucene.Net/Lucene.Net.csproj
@@ -134,6 +134,7 @@
<InternalsVisibleTo Include="Lucene.Net.Tests.Spatial" />
<InternalsVisibleTo Include="Lucene.Net.Tests.Suggest" />
<InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework" />
+ <InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework.DependencyInjection" />
</ItemGroup>
</Project>
diff --git a/src/Lucene.Net/Util/Constants.cs b/src/Lucene.Net/Util/Constants.cs
index 60addcacb..4183d7de4 100644
--- a/src/Lucene.Net/Util/Constants.cs
+++ b/src/Lucene.Net/Util/Constants.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
#if NETFRAMEWORK
@@ -30,6 +30,11 @@ namespace Lucene.Net.Util
/// </summary>
public static class Constants // LUCENENET specific - made static because all members are static and constructor in Lucene was private
{
+ /// <summary>
+ /// The maximum stack allocation size before switching to making allocations on the heap.
+ /// </summary>
+ internal static int MaxStackByteLimit = SystemProperties.GetPropertyAsInt32("maxStackByteLimit", defaultValue: 2048); // LUCENENET specific
+
// LUCENENET NOTE: IMPORTANT - this line must be placed before RUNTIME_VERSION so it can be parsed.
private static readonly Regex VERSION = new Regex(@"(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)", RegexOptions.Compiled);