You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by ni...@apache.org on 2022/10/31 06:19:12 UTC

[lucenenet] 04/14: PERFORMANCE: Lucene.Net.Analysis.Util.CharacterUtils: Use spans and stackalloc to reduce heap allocations when lowercasing. Added system property named "maxStackLimit" that defaults to 2048 bytes.

This is an automated email from the ASF dual-hosted git repository.

nightowl888 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/lucenenet.git

commit 98c52a8649cbb7e9c6f209556a84c49f59ae3ec8
Author: Shad Storhaug <sh...@shadstorhaug.com>
AuthorDate: Thu Oct 27 01:13:34 2022 +0700

    PERFORMANCE: Lucene.Net.Analysis.Util.CharacterUtils: Use spans and stackalloc to reduce heap allocations when lowercasing. Added system property named "maxStackLimit" that defaults to 2048 bytes.
---
 .build/dependencies.props                          |  1 +
 .../Analysis/Util/CharacterUtils.cs                | 40 +++++++++++++++-------
 .../Lucene.Net.Analysis.Common.csproj              |  8 +++++
 .../Configuration/TestConfigurationService.cs      |  8 +++++
 .../Startup.cs                                     |  3 +-
 src/Lucene.Net.Tests/Support/TestApiConsistency.cs |  2 +-
 src/Lucene.Net/Lucene.Net.csproj                   |  1 +
 src/Lucene.Net/Util/Constants.cs                   |  7 +++-
 8 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/.build/dependencies.props b/.build/dependencies.props
index 7a9ad2f27..1cfb3fc69 100644
--- a/.build/dependencies.props
+++ b/.build/dependencies.props
@@ -73,6 +73,7 @@
     <RandomizedTestingGeneratorsPackageVersion>2.7.8</RandomizedTestingGeneratorsPackageVersion>
     <SharpZipLibPackageVersion>1.1.0</SharpZipLibPackageVersion>
     <Spatial4nPackageVersion>0.4.1.1</Spatial4nPackageVersion>
+    <SystemMemoryPackageVersion>4.5.4</SystemMemoryPackageVersion>
     <SystemReflectionEmitPackageVersion>4.3.0</SystemReflectionEmitPackageVersion>
     <SystemReflectionEmitILGenerationPackageVersion>4.3.0</SystemReflectionEmitILGenerationPackageVersion>
     <SystemReflectionTypeExtensionsPackageVersion>4.3.0</SystemReflectionTypeExtensionsPackageVersion>
diff --git a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
index 8458ca33e..179e095b2 100644
--- a/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
+++ b/src/Lucene.Net.Analysis.Common/Analysis/Util/CharacterUtils.cs
@@ -183,10 +183,17 @@ namespace Lucene.Net.Analysis.Util
                 Debugging.Assert(offset <= 0 && offset <= buffer.Length);
             }
 
-            // Slight optimization, eliminating a few method calls internally
-            CultureInfo.InvariantCulture.TextInfo
-                .ToLower(new string(buffer, offset, length))
-                .CopyTo(0, buffer, offset, length);
+            // Reduce allocations by using the stack and spans
+            var source = new ReadOnlySpan<char>(buffer, offset, length);
+            var destination = buffer.AsSpan(offset, length);
+            var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length];
+            source.ToLower(spare, CultureInfo.InvariantCulture);
+            spare.CopyTo(destination);
+
+            //// Slight optimization, eliminating a few method calls internally
+            //CultureInfo.InvariantCulture.TextInfo
+            //    .ToLower(new string(buffer, offset, length))
+            //    .CopyTo(0, buffer, offset, length);
 
             //// Optimization provided by Vincent Van Den Berghe: 
             //// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower
@@ -194,8 +201,9 @@ namespace Lucene.Net.Analysis.Util
             //    .ToLowerInvariant()
             //    .CopyTo(0, buffer, offset, length);
 
-            // Original (slow) Lucene implementation:
-            //for (int i = offset; i < limit; )
+            //// Original (slow) Lucene implementation:
+            //int limit = length - offset;
+            //for (int i = offset; i < limit;)
             //{
             //    i += Character.ToChars(
             //        Character.ToLower(
@@ -217,10 +225,17 @@ namespace Lucene.Net.Analysis.Util
                 Debugging.Assert(offset <= 0 && offset <= buffer.Length);
             }
 
-            // Slight optimization, eliminating a few method calls internally
-            CultureInfo.InvariantCulture.TextInfo
-                .ToUpper(new string(buffer, offset, length))
-                .CopyTo(0, buffer, offset, length);
+            // Reduce 2 heap allocations by using the stack and spans
+            var source = new ReadOnlySpan<char>(buffer, offset, length);
+            var destination = buffer.AsSpan(offset, length);
+            var spare = length * sizeof(char) <= Constants.MaxStackByteLimit ? stackalloc char[length] : new char[length];
+            source.ToUpper(spare, CultureInfo.InvariantCulture);
+            spare.CopyTo(destination);
+
+            //// Slight optimization, eliminating a few method calls internally
+            //CultureInfo.InvariantCulture.TextInfo
+            //    .ToUpper(new string(buffer, offset, length))
+            //    .CopyTo(0, buffer, offset, length);
 
             //// Optimization provided by Vincent Van Den Berghe: 
             //// http://search-lucene.com/m/Lucene.Net/j1zMf1uckOzOYqsi?subj=Proposal+to+speed+up+implementation+of+LowercaseFilter+charUtils+ToLower
@@ -228,8 +243,9 @@ namespace Lucene.Net.Analysis.Util
             //    .ToUpperInvariant()
             //    .CopyTo(0, buffer, offset, length);
 
-            // Original (slow) Lucene implementation:
-            //for (int i = offset; i < limit; )
+            //// Original (slow) Lucene implementation:
+            //int limit = length - offset;
+            //for (int i = offset; i < limit;)
             //{
             //    i += Character.ToChars(
             //        Character.ToUpper(
diff --git a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
index 4fcfaef03..5a64acc9e 100644
--- a/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
+++ b/src/Lucene.Net.Analysis.Common/Lucene.Net.Analysis.Common.csproj
@@ -50,6 +50,14 @@
     <ProjectReference Include="..\Lucene.Net\Lucene.Net.csproj" />
   </ItemGroup>
 
+  <ItemGroup Condition=" '$(TargetFramework)' == 'netstandard2.0' ">
+    <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" />
+  </ItemGroup>
+
+  <ItemGroup Condition=" '$(TargetFramework)' == 'net462' ">
+    <PackageReference Include="System.Memory" Version="$(SystemMemoryPackageVersion)" />
+  </ItemGroup>
+
   <ItemGroup Condition=" '$(TargetFramework)' == 'net462' ">
     <Reference Include="System.Xml" />
   </ItemGroup>
diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
index 1090f77ad..a2d662e87 100644
--- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
+++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Configuration/TestConfigurationService.cs
@@ -34,5 +34,13 @@ namespace Lucene.Net.Configuration
             Assert.AreEqual("barValue", ConfigurationSettings.CurrentConfiguration["bar"]);
             Assert.AreEqual("bazValue", ConfigurationSettings.CurrentConfiguration["baz"]);
         }
+
+        [Test]
+        public void TestCustomMaxStackByteLimit()
+        {
+            // This custom value is configured in Startup.cs.
+            // 5000 chosen because it is not likely to ever be made a default.
+            Assert.AreEqual(5000, Constants.MaxStackByteLimit);
+        }
     }
 }
diff --git a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
index 0edb611cd..e21ad9594 100644
--- a/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
+++ b/src/Lucene.Net.Tests.TestFramework.DependencyInjection/Startup.cs
@@ -41,7 +41,8 @@ public class Startup : LuceneTestFrameworkInitializer
             {
                 ["foo"] = "fooValue",
                 ["bar"] = "barValue",
-                ["baz"] = "bazValue"
+                ["baz"] = "bazValue",
+                ["maxStackByteLimit"] = "5000",
             });
         ConfigureServices(serviceCollection, configurationBuilder);
         IServiceProvider services = serviceCollection.BuildServiceProvider();
diff --git a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
index 8d7bfc57b..6ac4ff2e3 100644
--- a/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
+++ b/src/Lucene.Net.Tests/Support/TestApiConsistency.cs
@@ -38,7 +38,7 @@ namespace Lucene.Net
         [TestCase(typeof(Lucene.Net.Analysis.Analyzer))]
         public override void TestPrivateFieldNames(Type typeFromTargetAssembly)
         {
-            base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions");
+            base.TestPrivateFieldNames(typeFromTargetAssembly, @"^Lucene\.Net\.Support\.(?:ConcurrentHashSet|PlatformHelper|DateTimeOffsetUtil)|^Lucene\.ExceptionExtensions|^Lucene\.Net\.Util\.Constants\.MaxStackByteLimit");
         }
 
         [Test, LuceneNetSpecific]
diff --git a/src/Lucene.Net/Lucene.Net.csproj b/src/Lucene.Net/Lucene.Net.csproj
index f6a8bfa36..3e70f21c6 100644
--- a/src/Lucene.Net/Lucene.Net.csproj
+++ b/src/Lucene.Net/Lucene.Net.csproj
@@ -134,6 +134,7 @@
     <InternalsVisibleTo Include="Lucene.Net.Tests.Spatial" />
     <InternalsVisibleTo Include="Lucene.Net.Tests.Suggest" />
     <InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework" />
+    <InternalsVisibleTo Include="Lucene.Net.Tests.TestFramework.DependencyInjection" />
   </ItemGroup>
 
 </Project>
diff --git a/src/Lucene.Net/Util/Constants.cs b/src/Lucene.Net/Util/Constants.cs
index 60addcacb..4183d7de4 100644
--- a/src/Lucene.Net/Util/Constants.cs
+++ b/src/Lucene.Net/Util/Constants.cs
@@ -1,4 +1,4 @@
-using System;
+using System;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 #if NETFRAMEWORK
@@ -30,6 +30,11 @@ namespace Lucene.Net.Util
     /// </summary>
     public static class Constants // LUCENENET specific - made static because all members are static and constructor in Lucene was private
     {
+        /// <summary>
+        /// The maximum stack allocation size before switching to making allocations on the heap.
+        /// </summary>
+        internal static int MaxStackByteLimit = SystemProperties.GetPropertyAsInt32("maxStackByteLimit", defaultValue: 2048); // LUCENENET specific
+
         // LUCENENET NOTE: IMPORTANT - this line must be placed before RUNTIME_VERSION so it can be parsed.
         private static readonly Regex VERSION = new Regex(@"(\d+\.\d+(?:\.\d+)?(?:\.\d+)?)", RegexOptions.Compiled);