You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2022/11/23 08:26:42 UTC

[uima-uimafit] branch feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders created (now 3196606)

This is an automated email from the ASF dual-hosted git repository.

rec pushed a change to branch feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders
in repository https://gitbox.apache.org/repos/asf/uima-uimafit.git


      at 3196606  Issue #215: Improve descriptor scanning performance when there are many classloaders

This branch includes the following new commits:

     new 3196606  Issue #215: Improve descriptor scanning performance when there are many classloaders

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[uima-uimafit] 01/01: Issue #215: Improve descriptor scanning performance when there are many classloaders

Posted by re...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch feature/215-Improve-descriptor-scanning-performance-when-there-are-many-classloaders
in repository https://gitbox.apache.org/repos/asf/uima-uimafit.git

commit 3196606a57e0fadf8ac49747797db2bc2f4cbda9
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Wed Nov 23 09:26:34 2022 +0100

    Issue #215: Improve descriptor scanning performance when there are many classloaders
    
    - Implement a per-location caching of pre-parsed and pre-resolved type system descriptions
---
 .../fit/factory/TypeSystemDescriptionFactory.java  | 50 +++++++++++++++++++---
 .../org/apache/uima/fit/ComponentTestBase.java     |  4 --
 .../factory/TypeSystemDescriptionFactoryTest.java  |  6 ++-
 3 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
index 74de8fb..1b611d8 100644
--- a/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
+++ b/uimafit-core/src/main/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactory.java
@@ -24,7 +24,9 @@ import static org.apache.uima.util.CasCreationUtils.mergeTypeSystems;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.ServiceLoader;
 import java.util.WeakHashMap;
 
@@ -51,11 +53,16 @@ public final class TypeSystemDescriptionFactory {
 
   private static final Object CREATE_LOCK = new Object();
 
+  private static final TypeSystemDescription PLACEHOLDER = new TypeSystemDescription_impl();
+
+  private static WeakHashMap<String, TypeSystemDescription> typeDescriptors;
+
   private static WeakHashMap<ClassLoader, String[]> typeDescriptorLocationsByClassloader;
 
   private static WeakHashMap<ClassLoader, TypeSystemDescription> typeDescriptorByClassloader;
 
   static {
+    typeDescriptors = new WeakHashMap<>();
     typeDescriptorLocationsByClassloader = new WeakHashMap<>();
     typeDescriptorByClassloader = new WeakHashMap<>();
   }
@@ -124,13 +131,13 @@ public final class TypeSystemDescriptionFactory {
     TypeSystemDescription tsd = typeDescriptorByClassloader.get(cl);
     if (tsd == null) {
       synchronized (CREATE_LOCK) {
+        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         List<TypeSystemDescription> tsdList = new ArrayList<>();
 
-        loadTypeSystemDescriptionsFromScannedLocations(tsdList);
+        loadTypeSystemDescriptionsFromScannedLocations(tsdList, resMgr);
         loadTypeSystemDescriptionsFromSPIs(tsdList);
 
         LOG.trace("Merging type systems and resolving imports...");
-        ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
         tsd = mergeTypeSystems(tsdList, resMgr);
         typeDescriptorByClassloader.put(cl, tsd);
       }
@@ -138,12 +145,20 @@ public final class TypeSystemDescriptionFactory {
     return (TypeSystemDescription) tsd.clone();
   }
 
-  static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList)
-          throws ResourceInitializationException {
+  static void loadTypeSystemDescriptionsFromScannedLocations(List<TypeSystemDescription> tsdList,
+          ResourceManager aResMgr) throws ResourceInitializationException {
     for (String location : scanTypeDescriptors()) {
       try {
-        XMLInputSource xmlInputType1 = new XMLInputSource(location);
-        tsdList.add(getXMLParser().parseTypeSystemDescription(xmlInputType1));
+        TypeSystemDescription description = typeDescriptors.get(location);
+
+        if (description == PLACEHOLDER) {
+          // If the description has not yet been loaded, load it
+          description = getXMLParser().parseTypeSystemDescription(new XMLInputSource(location));
+          description.resolveImports(aResMgr);
+          typeDescriptors.put(location, description);
+        }
+
+        tsdList.add(description);
         LOG.debug("Detected type system at [{}]", location);
       } catch (IOException e) {
         throw new ResourceInitializationException(e);
@@ -178,14 +193,36 @@ public final class TypeSystemDescriptionFactory {
     synchronized (SCAN_LOCK) {
       ClassLoader cl = ClassLoaderUtils.findClassloader();
       String[] typeDescriptorLocations = typeDescriptorLocationsByClassloader.get(cl);
+
       if (typeDescriptorLocations == null) {
         typeDescriptorLocations = scanDescriptors(MetaDataType.TYPE_SYSTEM);
+
+        internTypeDescriptorLocations(typeDescriptorLocations);
+
         typeDescriptorLocationsByClassloader.put(cl, typeDescriptorLocations);
       }
+
       return typeDescriptorLocations;
     }
   }
 
+  private static void internTypeDescriptorLocations(String[] typeDescriptorLocations) {
+    // We "intern" the location strings because we will use them as keys in the WeakHashMap
+    // caching the parsed type systems. As part of this process, we put a PLACEHOLDER into the
+    // map which is replaced when the type system is actually loaded
+    Map<String, String> locationStrings = new HashMap<>();
+    typeDescriptors.keySet().stream().forEach(loc -> locationStrings.put(loc, loc));
+    for (int i = 0; i < typeDescriptorLocations.length; i++) {
+      String existingLocString = locationStrings.get(typeDescriptorLocations[i]);
+      if (existingLocString == null) {
+        typeDescriptors.put(typeDescriptorLocations[i], PLACEHOLDER);
+        locationStrings.put(typeDescriptorLocations[i], typeDescriptorLocations[i]);
+      } else {
+        typeDescriptorLocations[i] = existingLocString;
+      }
+    }
+  }
+
   /**
    * Force rescan of type descriptors. The next call to {@link #scanTypeDescriptors()} will rescan
    * all auto-import locations.
@@ -195,6 +232,7 @@ public final class TypeSystemDescriptionFactory {
     synchronized (SCAN_LOCK) {
       typeDescriptorLocationsByClassloader.clear();
       typeDescriptorByClassloader.clear();
+      typeDescriptors.clear();
     }
   }
 }
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
index f747d4a..13dc3cd 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/ComponentTestBase.java
@@ -29,10 +29,6 @@ import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.BeforeEach;
 
-/**
- * 
- * 
- */
 public class ComponentTestBase {
 
   private static ThreadLocal<JCas> JCAS = new ThreadLocal<JCas>();
diff --git a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
index 052c36b..e7768e4 100644
--- a/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
+++ b/uimafit-core/src/test/java/org/apache/uima/fit/factory/TypeSystemDescriptionFactoryTest.java
@@ -27,9 +27,11 @@ import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.uima.fit.factory.spi.TypeSystemDescriptionProviderForTesting;
+import org.apache.uima.fit.internal.ResourceManagerFactory;
 import org.apache.uima.fit.type.AnalyzedText;
 import org.apache.uima.fit.type.Sentence;
 import org.apache.uima.fit.type.Token;
+import org.apache.uima.resource.ResourceManager;
 import org.apache.uima.resource.metadata.TypeSystemDescription;
 import org.apache.uima.util.CasCreationUtils;
 import org.junit.jupiter.api.Test;
@@ -54,8 +56,10 @@ public class TypeSystemDescriptionFactoryTest {
 
   @Test
   public void testLoadingFromScannedLocations() throws Exception {
+    ResourceManager resMgr = ResourceManagerFactory.newResourceManager();
+
     List<TypeSystemDescription> tsds = new ArrayList<>();
-    loadTypeSystemDescriptionsFromScannedLocations(tsds);
+    loadTypeSystemDescriptionsFromScannedLocations(tsds, resMgr);
     TypeSystemDescription tsd = CasCreationUtils.mergeTypeSystems(tsds);
 
     assertNotNull(tsd.getType(Token.class.getName()));