You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2020/03/28 14:39:46 UTC

[uima-uimaj] branch feature/UIMA-6200-Constructing-aggregate-engines-is-slow created (now fe73a79)

This is an automated email from the ASF dual-hosted git repository.

rec pushed a change to branch feature/UIMA-6200-Constructing-aggregate-engines-is-slow
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git.


      at fe73a79  [UIMA-6200] Constructing aggregate engines is slow

This branch includes the following new commits:

     new fe73a79  [UIMA-6200] Constructing aggregate engines is slow

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



[uima-uimaj] 01/01: [UIMA-6200] Constructing aggregate engines is slow

Posted by re...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

rec pushed a commit to branch feature/UIMA-6200-Constructing-aggregate-engines-is-slow
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git

commit fe73a79231caa253e58b0e27d050b72a3cb37519
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Sat Mar 28 15:39:44 2020 +0100

    [UIMA-6200] Constructing aggregate engines is slow
    
    - Added a couple of different approaches and some unit tests
    - This remains work in progress
---
 uimaj-core/pom.xml                                 |   5 +
 .../apache/uima/flow/impl/FixedFlowController.java |  68 +++++++++++--
 .../uima/flow/impl/FixedFlowControllerTest.java    | 112 +++++++++++++++++++--
 3 files changed, 170 insertions(+), 15 deletions(-)

diff --git a/uimaj-core/pom.xml b/uimaj-core/pom.xml
index d78951f..7d9bc63 100644
--- a/uimaj-core/pom.xml
+++ b/uimaj-core/pom.xml
@@ -73,6 +73,11 @@
 			<version>${project.parent.version}</version>
 			<scope>test</scope>
 		</dependency>
+		<dependency>
+			<groupId>org.assertj</groupId>
+			<artifactId>assertj-core</artifactId>
+			<scope>test</scope>
+		</dependency>
 
 		<!--  Uncomment one of these to add Saxon8 or 9 to the classpath for JUnit tests. 
 		      1 fails due to different formatting and a dropped xmlns attribute, 
diff --git a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
index a2734ab..6938c98 100644
--- a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
+++ b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
@@ -19,6 +19,8 @@
 
 package org.apache.uima.flow.impl;
 
+import static org.apache.uima.UIMAFramework.getResourceSpecifierFactory;
+
 import java.io.IOException;
 import java.net.URL;
 import java.util.ArrayList;
@@ -42,6 +44,12 @@ import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.Capability;
+import org.apache.uima.resource.metadata.ConfigurationParameter;
+import org.apache.uima.resource.metadata.ConfigurationParameterDeclarations;
+import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
+import org.apache.uima.resource.metadata.NameValuePair;
+import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
 import org.apache.uima.util.InvalidXMLException;
 import org.apache.uima.util.XMLInputSource;
 
@@ -71,6 +79,8 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   private static final int ACTION_DROP = 2;
 
   private static final int ACTION_DROP_IF_NEW_CAS_PRODUCED = 3;
+  
+  private static FlowControllerDescription cachedDefaultDescription;
 
   // make final to work better in multi-thread case  UIMA-2373
   // working assumption: 
@@ -85,7 +95,7 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   //   on different threads. However, users will not re-initialize this with a different 
   //   flowControllerContext while this object is controlling CASes from the previous Object.
   // When this was a synchronized list, some contention observed between the "reads", which can be eliminated by
-  //   swtiching this to a copy-on-write kind of final list.
+  //   switching this to a copy-on-write kind of final list.
   //      -- this has the added "benefit" (maybe eventually) of having better semantics for letting existing
   //         Flow objects continue to use the "old" settings, and only the new ones picking up the new ones.
   final private List<String> mSequence = new CopyOnWriteArrayList<String>();  //UIMA-4013
@@ -158,15 +168,61 @@ public class FixedFlowController extends CasFlowController_ImplBase {
   }
 
   public static FlowControllerDescription getDescription() {
+    if (cachedDefaultDescription == null) {
+      synchronized (FixedFlowController.class) {
+        cachedDefaultDescription = loadDefaultDescription();
+      }
+    }
+    
+    return (FlowControllerDescription) cachedDefaultDescription.clone();
+  }
+  
+  public static FlowControllerDescription makeDefaultDescription() {
+    FlowControllerDescription desc = getResourceSpecifierFactory().createFlowControllerDescription();
+    
+    desc.setImplementationName(FixedFlowController.class.getName());
+    
+    ProcessingResourceMetaData metaData = desc.getFlowControllerMetaData();
+    metaData.setName("Fixed Flow Controller");
+    metaData.setDescription("Simple FlowController that uses the FixedFlow element of the\n" + 
+        "\t\taggregate descriptor to determine a linear flow.");
+    metaData.setVendor("The Apache Software Foundation");
+    metaData.setVersion("1.0");
+    
+    Capability capability = getResourceSpecifierFactory().createCapability();
+    metaData.setCapabilities(new Capability[] { capability });
+   
+    ConfigurationParameter param = getResourceSpecifierFactory().createConfigurationParameter();
+    param.setName("ActionAfterCasMultiplier");
+    param.setType("String");
+    param.setDescription("The action to be taken after a CAS has been input to a CAS Multiplier and the CAS Multiplier has finished processing it.\n" + 
+        "\t\t Valid values are:\n" + 
+        "\t\t\tcontinue - the CAS continues on to the next element in the flow\n" + 
+        "\t\t\tstop - the CAS will no longer continue in the flow, and will be returned from the aggregate if possible.\n" + 
+        "\t\t\tdrop - the CAS will no longer continue in the flow, and will be dropped (not returned from the aggregate) if possible.\t \n" + 
+        "\t\t\tdropIfNewCasProduced (the default) - if the CAS multiplier produced a new CAS as a result of processing this CAS, then this\n" + 
+        "\t\t\t\tCAS will be dropped.  If not, then this CAS will continue.");
+    ConfigurationParameterDeclarations parameterDeclarations = getResourceSpecifierFactory().createConfigurationParameterDeclarations();
+    parameterDeclarations.setConfigurationParameters(new ConfigurationParameter[] { param });
+    metaData.setConfigurationParameterDeclarations(parameterDeclarations);
+    
+    NameValuePair paramSetting = getResourceSpecifierFactory().createNameValuePair();
+    paramSetting.setName("ActionAfterCasMultiplier");
+    paramSetting.setValue("dropIfNewCasProduced");
+    ConfigurationParameterSettings parameterSettings = getResourceSpecifierFactory().createConfigurationParameterSettings();
+    parameterSettings.setParameterSettings(new NameValuePair[] { paramSetting });
+    metaData.setConfigurationParameterSettings(parameterSettings);
+
+    return desc;
+  }
+  
+  private static FlowControllerDescription loadDefaultDescription() {
     URL descUrl = FixedFlowController.class
             .getResource("/org/apache/uima/flow/FixedFlowController.xml");
     FlowControllerDescription desc;
     try {
-      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
-              new XMLInputSource(descUrl));
-    } catch (InvalidXMLException e) {
-      throw new UIMARuntimeException(e);
-    } catch (IOException e) {
+      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(new XMLInputSource(descUrl));
+    } catch (InvalidXMLException | IOException e) {
       throw new UIMARuntimeException(e);
     }
     return desc;
diff --git a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
index 2697823..3a9674d 100644
--- a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
+++ b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
@@ -18,15 +18,23 @@
  */
 package org.apache.uima.flow.impl;
 
+import static java.lang.System.currentTimeMillis;
+import static org.apache.uima.UIMAFramework.getXMLParser;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.net.URL;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
-import junit.framework.TestCase;
-
 import org.apache.uima.UIMAFramework;
+import org.apache.uima.UIMARuntimeException;
 import org.apache.uima.UimaContextAdmin;
 import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
 import org.apache.uima.analysis_engine.metadata.FixedFlow;
@@ -36,24 +44,25 @@ import org.apache.uima.cas.CAS;
 import org.apache.uima.flow.FinalStep;
 import org.apache.uima.flow.Flow;
 import org.apache.uima.flow.FlowControllerContext;
+import org.apache.uima.flow.FlowControllerDescription;
 import org.apache.uima.flow.SimpleStep;
 import org.apache.uima.flow.Step;
 import org.apache.uima.resource.metadata.OperationalProperties;
 import org.apache.uima.resource.metadata.impl.OperationalProperties_impl;
 import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
 import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Before;
+import org.junit.Test;
 
-
-public class FixedFlowControllerTest extends TestCase {
+public class FixedFlowControllerTest {
 
   private Map<String, AnalysisEngineMetaData> analysisEngineMetaDataMap;
   private FixedFlowController fixedFlowController;
   
-  /* (non-Javadoc)
-   * @see junit.framework.TestCase#setUp()
-   */
-  protected void setUp() throws Exception {
-    super.setUp();
+  @Before
+  public void setUp() throws Exception {
     analysisEngineMetaDataMap = new HashMap<String, AnalysisEngineMetaData>();
     AnalysisEngineMetaData delegateMd = new AnalysisEngineMetaData_impl();
     delegateMd.setOperationalProperties(new OperationalProperties_impl());
@@ -79,6 +88,7 @@ public class FixedFlowControllerTest extends TestCase {
     fixedFlowController.initialize(fcContext);    
   }
 
+  @Test
   public void testComputeFlow() throws Exception {
     CAS cas1 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     CAS cas2 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
@@ -119,6 +129,7 @@ public class FixedFlowControllerTest extends TestCase {
     assertTrue(step instanceof FinalStep);
   }
   
+  @Test
   public void testAddAnalysisEngines() throws Exception {
     CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     Flow flow = fixedFlowController.computeFlow(cas);
@@ -176,6 +187,7 @@ public class FixedFlowControllerTest extends TestCase {
     assertTrue(step instanceof FinalStep);
   }
   
+  @Test
   public void testRemoveAnalysisEngines() throws Exception {
     CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
     Flow flow = fixedFlowController.computeFlow(cas);
@@ -208,4 +220,86 @@ public class FixedFlowControllerTest extends TestCase {
     step = flow.next();
     assertTrue(step instanceof FinalStep);
   }
+
+  @Test
+  public void howLongDescriptionCreationTakes() throws Exception
+  {
+    int testPeriod = 10_000;
+    long start;
+    long end;
+    
+    start = currentTimeMillis();
+    int countParsedFromXml = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      getDescriptionFromXml();
+      countParsedFromXml++;
+    }
+
+    start = currentTimeMillis();
+    int countWithCachingAndCloning = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      FixedFlowController.getDescription();
+      countWithCachingAndCloning++;
+    }
+
+    start = currentTimeMillis();
+    int countGeneratedInMemory = 0;
+    while ((end = currentTimeMillis()) < start + testPeriod) {
+      FixedFlowController.makeDefaultDescription();
+      countGeneratedInMemory++;
+    }
+
+    System.out.printf("[%d] FixedFlowController parsed from XML in [%d]ms%n",
+        countParsedFromXml, end - start );
+    System.out.printf("[%d] FixedFlowController parsed once, cached and cloned in [%d]ms%n",
+        countWithCachingAndCloning, end - start );
+    System.out.printf("[%d] FixedFlowController generated in memory in [%d]ms%n",
+        countGeneratedInMemory, end - start );
+  }
+
+  @Test
+  public void thatGeneratedDefaultFlowDescriptionIsEqualToXmlDescription() throws Exception
+  {
+    FlowControllerDescription desc1 = FixedFlowController.getDescription();
+    
+    URL descUrl = FixedFlowController.class
+        .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+    FlowControllerDescription desc2 = getXMLParser().parseFlowControllerDescription(
+        new XMLInputSource(descUrl));
+
+    StringWriter desc1Writer = new StringWriter();
+    desc1.toXML(desc1Writer);
+    
+    StringWriter desc2Writer = new StringWriter();
+    desc2.toXML(desc2Writer);
+    
+    assertThat(desc2.toString()).isEqualTo(desc1.toString());
+  }
+
+  @Test
+  public void thatChangesToDefaultFlowControllerDoNotCarryOver() throws Exception
+  {
+    FlowControllerDescription desc1 = FixedFlowController.getDescription();
+    
+    desc1.setImplementationName("otherImplementation");
+    desc1.getMetaData().setName("otherName");
+
+    FlowControllerDescription desc2 = FixedFlowController.getDescription();
+    
+    assertThat(desc2.getImplementationName()).isEqualTo(FixedFlowController.class.getName());
+    assertThat(desc2.getMetaData().getName()).isEqualTo("Fixed Flow Controller");
+  }
+  
+  public static FlowControllerDescription getDescriptionFromXml() {
+    URL descUrl = FixedFlowController.class
+            .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+    FlowControllerDescription desc;
+    try {
+      desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
+              new XMLInputSource(descUrl));
+    } catch (InvalidXMLException | IOException e) {
+      throw new UIMARuntimeException(e);
+    }
+    return desc;
+  }
 }