You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2020/03/28 14:39:47 UTC

[uima-uimaj] 01/01: [UIMA-6200] Constructing aggregate engines is slow

This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch feature/UIMA-6200-Constructing-aggregate-engines-is-slow
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit fe73a79231caa253e58b0e27d050b72a3cb37519
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Sat Mar 28 15:39:44 2020 +0100

    [UIMA-6200] Constructing aggregate engines is slow
    
    - Added a couple of different approaches and some unit tests
    - This remains work in progress
---
 uimaj-core/pom.xml                                 |   5 +
 .../apache/uima/flow/impl/FixedFlowController.java |  68 +++++++++++--
 .../uima/flow/impl/FixedFlowControllerTest.java    | 112 +++++++++++++++++++--
 3 files changed, 170 insertions(+), 15 deletions(-)

diff --git a/uimaj-core/pom.xml b/uimaj-core/pom.xml
index d78951f..7d9bc63 100644
--- a/uimaj-core/pom.xml
+++ b/uimaj-core/pom.xml
@@ -73,6 +73,11 @@
 			<version>${project.parent.version}</version>
 			<scope>test</scope>
 		</dependency>
+		<dependency>
+			<groupId>org.assertj</groupId>
+			<artifactId>assertj-core</artifactId>
+			<scope>test</scope>
+		</dependency>
 
 		<!--  Uncomment one of these to add Saxon8 or 9 to the classpath for JUnit tests. 
 		      1 fails due to different formatting and a dropped xmlns attribute, 
diff --git a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
index a2734ab..6938c98 100644
--- a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
+++ b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
@@ -19,6 +19,8 @@
 
 package org.apache.uima.flow.impl;
 
+import static org.apache.uima.UIMAFramework.getResourceSpecifierFactory;
+
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
@@ -42,6 +44,12 @@ import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.Capability;
+import org.apache.uima.resource.metadata.ConfigurationParameter;
+import org.apache.uima.resource.metadata.ConfigurationParameterDeclarations;
+import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
+import org.apache.uima.resource.metadata.NameValuePair;
+import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
 
@@ -71,6 +79,8 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   private static final int ACTION_DROP = 2;
 
   private static final int ACTION_DROP_IF_NEW_CAS_PRODUCED = 3;
+  
+  private static FlowControllerDescription cachedDefaultDescription;
 
   // make final to work better in multi-thread case  UIMA-2373
   // working assumption: 
@@ -85,7 +95,7 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   //   on different threads. However, users will not re-initialize this with a different 
   //   flowControllerContext while this object is controlling CASes from the previous Object.
   // When this was a synchronized list, some contention observed between the "reads", which can be eliminated by
-  //   swtiching this to a copy-on-write kind of final list.
+  //   switching this to a copy-on-write kind of final list.
   //      -- this has the added "benefit" (maybe eventually) of having better semantics for letting existing
   //         Flow objects continue to use the "old" settings, and only the new ones picking up the new ones.
   final private List<String> mSequence = new CopyOnWriteArrayList<String>();  //UIMA-4013
@@ -158,15 +168,61 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   }
 
   public static FlowControllerDescription getDescription() {
+    if (cachedDefaultDescription == null) {
+      synchronized (FixedFlowController.class) {
+        cachedDefaultDescription = loadDefaultDescription();
+      }
+    }
+    
+    return (FlowControllerDescription) cachedDefaultDescription.clone();
+  }
+  
+  public static FlowControllerDescription makeDefaultDescription() {
+    FlowControllerDescription desc = getResourceSpecifierFactory().createFlowControllerDescription();
+    
+    desc.setImplementationName(FixedFlowController.class.getName());
+    
+    ProcessingResourceMetaData metaData = desc.getFlowControllerMetaData();
+    metaData.setName("Fixed Flow Controller");
+    metaData.setDescription("Simple FlowController that uses the FixedFlow element of the\n" + 
+        "\t\taggregate descriptor to determine a linear flow.");
+    metaData.setVendor("The Apache Software Foundation");
+    metaData.setVersion("1.0");
+    
+    Capability capability = getResourceSpecifierFactory().createCapability();
+    metaData.setCapabilities(new Capability[] { capability });
+   
+    ConfigurationParameter param = getResourceSpecifierFactory().createConfigurationParameter();
+    param.setName("ActionAfterCasMultiplier");
+    param.setType("String");
+    param.setDescription("The action to be taken after a CAS has been input to a CAS Multiplier and the CAS Multiplier has finished processing it.\n" + 
+        "\t\t Valid values are:\n" + 
+        "\t\t\tcontinue - the CAS continues on to the next element in the flow\n" + 
+        "\t\t\tstop - the CAS will no longer continue in the flow, and will be returned from the aggregate if possible.\n" + 
+        "\t\t\tdrop - the CAS will no longer continue in the flow, and will be dropped (not returned from the aggregate) if possible.\t \n" + 
+        "\t\t\tdropIfNewCasProduced (the default) - if the CAS multiplier produced a new CAS as a result of processing this CAS, then this\n" + 
+        "\t\t\t\tCAS will be dropped.  If not, then this CAS will continue.");
+    ConfigurationParameterDeclarations parameterDeclarations = getResourceSpecifierFactory().createConfigurationParameterDeclarations();
+    parameterDeclarations.setConfigurationParameters(new ConfigurationParameter[] { param });
+    metaData.setConfigurationParameterDeclarations(parameterDeclarations);
+    
+    NameValuePair paramSetting = getResourceSpecifierFactory().createNameValuePair();
+    paramSetting.setName("ActionAfterCasMultiplier");
+    paramSetting.setValue("dropIfNewCasProduced");
+    ConfigurationParameterSettings parameterSettings = getResourceSpecifierFactory().createConfigurationParameterSettings();
+    parameterSettings.setParameterSettings(new NameValuePair[] { paramSetting });
+    metaData.setConfigurationParameterSettings(parameterSettings);
+
+    return desc;
+  }
+  
+  private static FlowControllerDescription loadDefaultDescription() {
     URL descUrl = FixedFlowController.class
             .getResource("/org/apache/uima/flow/FixedFlowController.xml");
     FlowControllerDescription desc;
     try {
-      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
-              new XMLInputSource(descUrl));
-    } catch (InvalidXMLException e) {
-      throw new UIMARuntimeException(e);
-    } catch (IOException e) {
+      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(new XMLInputSource(descUrl));
+    } catch (InvalidXMLException | IOException e) {
       throw new UIMARuntimeException(e);
     }
     return desc;
diff --git a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
index 2697823..3a9674d 100644
--- a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
+++ b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
@@ -18,15 +18,23 @@
  */
 package org.apache.uima.flow.impl;
 
+import static java.lang.System.currentTimeMillis;
+import static org.apache.uima.UIMAFramework.getXMLParser;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import junit.framework.TestCase;
-
 import org.apache.uima.UIMAFramework;
+import org.apache.uima.UIMARuntimeException;
 import org.apache.uima.UimaContextAdmin;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
 import org.apache.uima.analysis_engine.metadata.FixedFlow;
@@ -36,24 +44,25 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.flow.FinalStep;
 import org.apache.uima.flow.Flow;
 import org.apache.uima.flow.FlowControllerContext;
+import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
 import org.apache.uima.resource.metadata.OperationalProperties;
 import org.apache.uima.resource.metadata.impl.OperationalProperties_impl;
 import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
 import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Before;
+import org.junit.Test;
 
-
-public class FixedFlowControllerTest extends TestCase {
+public class FixedFlowControllerTest {
 
   private Map<String, AnalysisEngineMetaData> analysisEngineMetaDataMap;
   private FixedFlowController fixedFlowController;
   
-  /* (non-Javadoc)
-   * @see junit.framework.TestCase#setUp()
-   */
-  protected void setUp() throws Exception {
-    super.setUp();
+  @Before
+  public void setUp() throws Exception {
     analysisEngineMetaDataMap = new HashMap<String, AnalysisEngineMetaData>();
     AnalysisEngineMetaData delegateMd = new AnalysisEngineMetaData_impl();
     delegateMd.setOperationalProperties(new OperationalProperties_impl());
@@ -79,6 +88,7 @@ public class FixedFlowControllerTest extends TestCase {
     fixedFlowController.initialize(fcContext);    
   }
 
+  @Test
   public void testComputeFlow() throws Exception {
     CAS cas1 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     CAS cas2 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
@@ -119,6 +129,7 @@ public class FixedFlowControllerTest extends TestCase {
     assertTrue(step instanceof FinalStep);
   }
   
+  @Test
   public void testAddAnalysisEngines() throws Exception {
     CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     Flow flow = fixedFlowController.computeFlow(cas);
@@ -176,6 +187,7 @@ public class FixedFlowControllerTest extends TestCase {
     assertTrue(step instanceof FinalStep);
   }
   
+  @Test
   public void testRemoveAnalysisEngines() throws Exception {
     CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     Flow flow = fixedFlowController.computeFlow(cas);
@@ -208,4 +220,86 @@ public class FixedFlowControllerTest extends TestCase {
     step = flow.next();
     assertTrue(step instanceof FinalStep);
   }
+
+  @Test
+  public void howLongDescriptionCreationTakes() throws Exception
+  {
+    int testPeriod = 10_000;
+    long start;
+    long end;
+    
+    start = currentTimeMillis();
+    int countParsedFromXml = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      getDescriptionFromXml();
+      countParsedFromXml++;
+    }
+
+    start = currentTimeMillis();
+    int countWithCachingAndCloning = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      FixedFlowController.getDescription();
+      countWithCachingAndCloning++;
+    }
+
+    start = currentTimeMillis();
+    int countGeneratedInMemory = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      FixedFlowController.makeDefaultDescription();
+      countGeneratedInMemory++;
+    }
+
+    System.out.printf("[%d] FixedFlowController parsed from XML in [%d]ms%n",
+        countParsedFromXml, end - start );
+    System.out.printf("[%d] FixedFlowController parsed once, cached and cloned in [%d]ms%n",
+        countWithCachingAndCloning, end - start );
+    System.out.printf("[%d] FixedFlowController generated in memory in [%d]ms%n",
+        countGeneratedInMemory, end - start );
+  }
+
+  @Test
+  public void thatGeneratedDefaultFlowDescriptionIsEqualToXmlDescription() throws Exception
+  {
+    FlowControllerDescription desc1 = FixedFlowController.getDescription();
+    
+    URL descUrl = FixedFlowController.class
+        .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+    FlowControllerDescription desc2 = getXMLParser().parseFlowControllerDescription(
+        new XMLInputSource(descUrl));
+
+    StringWriter desc1Writer = new StringWriter();
+    desc1.toXML(desc1Writer);
+    
+    StringWriter desc2Writer = new StringWriter();
+    desc2.toXML(desc2Writer);
+    
+    assertThat(desc2.toString()).isEqualTo(desc1.toString());
+  }
+
+  @Test
+  public void thatChangesToDefaultFlowControllerDoNotCarryOver() throws Exception
+  {
+    FlowControllerDescription desc1 = FixedFlowController.getDescription();
+    
+    desc1.setImplementationName("otherImplementation");
+    desc1.getMetaData().setName("otherName");
+
+    FlowControllerDescription desc2 = FixedFlowController.getDescription();
+    
+    assertThat(desc2.getImplementationName()).isEqualTo(FixedFlowController.class.getName());
+    assertThat(desc2.getMetaData().getName()).isEqualTo("Fixed Flow Controller");
+  }
+  
+  public static FlowControllerDescription getDescriptionFromXml() {
+    URL descUrl = FixedFlowController.class
+            .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+    FlowControllerDescription desc;
+    try {
+      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
+              new XMLInputSource(descUrl));
+    } catch (InvalidXMLException | IOException e) {
+      throw new UIMARuntimeException(e);
+    }
+    return desc;
+  }
 }