You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by re...@apache.org on 2020/03/28 14:39:47 UTC
[uima-uimaj] 01/01: [UIMA-6200] Constructing aggregate engines is
slow
This is an automated email from the ASF dual-hosted git repository.
rec pushed a commit to branch feature/UIMA-6200-Constructing-aggregate-engines-is-slow
in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit fe73a79231caa253e58b0e27d050b72a3cb37519
Author: Richard Eckart de Castilho <re...@apache.org>
AuthorDate: Sat Mar 28 15:39:44 2020 +0100
[UIMA-6200] Constructing aggregate engines is slow
- Added a couple of different approaches and some unit tests
- This remains work in progress
---
uimaj-core/pom.xml | 5 +
.../apache/uima/flow/impl/FixedFlowController.java | 68 +++++++++++--
.../uima/flow/impl/FixedFlowControllerTest.java | 112 +++++++++++++++++++--
3 files changed, 170 insertions(+), 15 deletions(-)
diff --git a/uimaj-core/pom.xml b/uimaj-core/pom.xml
index d78951f..7d9bc63 100644
--- a/uimaj-core/pom.xml
+++ b/uimaj-core/pom.xml
@@ -73,6 +73,11 @@
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.assertj</groupId>
+ <artifactId>assertj-core</artifactId>
+ <scope>test</scope>
+ </dependency>
<!-- Uncomment one of these to add Saxon8 or 9 to the classpath for JUnit tests.
1 fails due to different formatting and a dropped xmlns attribute,
diff --git a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
index a2734ab..6938c98 100644
--- a/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
+++ b/uimaj-core/src/main/java/org/apache/uima/flow/impl/FixedFlowController.java
@@ -19,6 +19,8 @@
package org.apache.uima.flow.impl;
+import static org.apache.uima.UIMAFramework.getResourceSpecifierFactory;
+
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
@@ -42,6 +44,12 @@ import org.apache.uima.flow.FlowControllerDescription;
import org.apache.uima.flow.SimpleStep;
import org.apache.uima.flow.Step;
import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.resource.metadata.Capability;
+import org.apache.uima.resource.metadata.ConfigurationParameter;
+import org.apache.uima.resource.metadata.ConfigurationParameterDeclarations;
+import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
+import org.apache.uima.resource.metadata.NameValuePair;
+import org.apache.uima.resource.metadata.ProcessingResourceMetaData;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
@@ -71,6 +79,8 @@ public class FixedFlowController extends CasFlowController_ImplBase {
private static final int ACTION_DROP = 2;
private static final int ACTION_DROP_IF_NEW_CAS_PRODUCED = 3;
+
+ private static FlowControllerDescription cachedDefaultDescription;
// make final to work better in multi-thread case UIMA-2373
// working assumption:
@@ -85,7 +95,7 @@ public class FixedFlowController extends CasFlowController_ImplBase {
// on different threads. However, users will not re-initialize this with a different
// flowControllerContext while this object is controlling CASes from the previous Object.
// When this was a synchronized list, some contention observed between the "reads", which can be eliminated by
- // swtiching this to a copy-on-write kind of final list.
+ // switching this to a copy-on-write kind of final list.
// -- this has the added "benefit" (maybe eventually) of having better semantics for letting existing
// Flow objects continue to use the "old" settings, and only the new ones picking up the new ones.
final private List<String> mSequence = new CopyOnWriteArrayList<String>(); //UIMA-4013
@@ -158,15 +168,61 @@ public class FixedFlowController extends CasFlowController_ImplBase {
}
public static FlowControllerDescription getDescription() {
+ if (cachedDefaultDescription == null) {
+ synchronized (FixedFlowController.class) {
+ cachedDefaultDescription = loadDefaultDescription();
+ }
+ }
+
+ return (FlowControllerDescription) cachedDefaultDescription.clone();
+ }
+
+ public static FlowControllerDescription makeDefaultDescription() {
+ FlowControllerDescription desc = getResourceSpecifierFactory().createFlowControllerDescription();
+
+ desc.setImplementationName(FixedFlowController.class.getName());
+
+ ProcessingResourceMetaData metaData = desc.getFlowControllerMetaData();
+ metaData.setName("Fixed Flow Controller");
+ metaData.setDescription("Simple FlowController that uses the FixedFlow element of the\n" +
+ "\t\taggregate descriptor to determine a linear flow.");
+ metaData.setVendor("The Apache Software Foundation");
+ metaData.setVersion("1.0");
+
+ Capability capability = getResourceSpecifierFactory().createCapability();
+ metaData.setCapabilities(new Capability[] { capability });
+
+ ConfigurationParameter param = getResourceSpecifierFactory().createConfigurationParameter();
+ param.setName("ActionAfterCasMultiplier");
+ param.setType("String");
+ param.setDescription("The action to be taken after a CAS has been input to a CAS Multiplier and the CAS Multiplier has finished processing it.\n" +
+ "\t\t Valid values are:\n" +
+ "\t\t\tcontinue - the CAS continues on to the next element in the flow\n" +
+ "\t\t\tstop - the CAS will no longer continue in the flow, and will be returned from the aggregate if possible.\n" +
+ "\t\t\tdrop - the CAS will no longer continue in the flow, and will be dropped (not returned from the aggregate) if possible.\t \n" +
+ "\t\t\tdropIfNewCasProduced (the default) - if the CAS multiplier produced a new CAS as a result of processing this CAS, then this\n" +
+ "\t\t\t\tCAS will be dropped. If not, then this CAS will continue.");
+ ConfigurationParameterDeclarations parameterDeclarations = getResourceSpecifierFactory().createConfigurationParameterDeclarations();
+ parameterDeclarations.setConfigurationParameters(new ConfigurationParameter[] { param });
+ metaData.setConfigurationParameterDeclarations(parameterDeclarations);
+
+ NameValuePair paramSetting = getResourceSpecifierFactory().createNameValuePair();
+ paramSetting.setName("ActionAfterCasMultiplier");
+ paramSetting.setValue("dropIfNewCasProduced");
+ ConfigurationParameterSettings parameterSettings = getResourceSpecifierFactory().createConfigurationParameterSettings();
+ parameterSettings.setParameterSettings(new NameValuePair[] { paramSetting });
+ metaData.setConfigurationParameterSettings(parameterSettings);
+
+ return desc;
+ }
+
+ private static FlowControllerDescription loadDefaultDescription() {
URL descUrl = FixedFlowController.class
.getResource("/org/apache/uima/flow/FixedFlowController.xml");
FlowControllerDescription desc;
try {
- desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
- new XMLInputSource(descUrl));
- } catch (InvalidXMLException e) {
- throw new UIMARuntimeException(e);
- } catch (IOException e) {
+ desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(new XMLInputSource(descUrl));
+ } catch (InvalidXMLException | IOException e) {
throw new UIMARuntimeException(e);
}
return desc;
diff --git a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
index 2697823..3a9674d 100644
--- a/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
+++ b/uimaj-core/src/test/java/org/apache/uima/flow/impl/FixedFlowControllerTest.java
@@ -18,15 +18,23 @@
*/
package org.apache.uima.flow.impl;
+import static java.lang.System.currentTimeMillis;
+import static org.apache.uima.UIMAFramework.getXMLParser;
+import static org.assertj.core.api.Assertions.assertThat;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.io.StringWriter;
+import java.net.URL;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import junit.framework.TestCase;
-
import org.apache.uima.UIMAFramework;
+import org.apache.uima.UIMARuntimeException;
import org.apache.uima.UimaContextAdmin;
import org.apache.uima.analysis_engine.metadata.AnalysisEngineMetaData;
import org.apache.uima.analysis_engine.metadata.FixedFlow;
@@ -36,24 +44,25 @@ import org.apache.uima.cas.CAS;
import org.apache.uima.flow.FinalStep;
import org.apache.uima.flow.Flow;
import org.apache.uima.flow.FlowControllerContext;
+import org.apache.uima.flow.FlowControllerDescription;
import org.apache.uima.flow.SimpleStep;
import org.apache.uima.flow.Step;
import org.apache.uima.resource.metadata.OperationalProperties;
import org.apache.uima.resource.metadata.impl.OperationalProperties_impl;
import org.apache.uima.resource.metadata.impl.TypeSystemDescription_impl;
import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.InvalidXMLException;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Before;
+import org.junit.Test;
-
-public class FixedFlowControllerTest extends TestCase {
+public class FixedFlowControllerTest {
private Map<String, AnalysisEngineMetaData> analysisEngineMetaDataMap;
private FixedFlowController fixedFlowController;
- /* (non-Javadoc)
- * @see junit.framework.TestCase#setUp()
- */
- protected void setUp() throws Exception {
- super.setUp();
+ @Before
+ public void setUp() throws Exception {
analysisEngineMetaDataMap = new HashMap<String, AnalysisEngineMetaData>();
AnalysisEngineMetaData delegateMd = new AnalysisEngineMetaData_impl();
delegateMd.setOperationalProperties(new OperationalProperties_impl());
@@ -79,6 +88,7 @@ public class FixedFlowControllerTest extends TestCase {
fixedFlowController.initialize(fcContext);
}
+ @Test
public void testComputeFlow() throws Exception {
CAS cas1 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
CAS cas2 = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
@@ -119,6 +129,7 @@ public class FixedFlowControllerTest extends TestCase {
assertTrue(step instanceof FinalStep);
}
+ @Test
public void testAddAnalysisEngines() throws Exception {
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
Flow flow = fixedFlowController.computeFlow(cas);
@@ -176,6 +187,7 @@ public class FixedFlowControllerTest extends TestCase {
assertTrue(step instanceof FinalStep);
}
+ @Test
public void testRemoveAnalysisEngines() throws Exception {
CAS cas = CasCreationUtils.createCas(new TypeSystemDescription_impl(), null, null);
Flow flow = fixedFlowController.computeFlow(cas);
@@ -208,4 +220,86 @@ public class FixedFlowControllerTest extends TestCase {
step = flow.next();
assertTrue(step instanceof FinalStep);
}
+
+ @Test
+ public void howLongDescriptionCreationTakes() throws Exception
+ {
+ int testPeriod = 10_000;
+ long start;
+ long end;
+
+ start = currentTimeMillis();
+ int countParsedFromXml = 0;
+ while ((end = currentTimeMillis()) < start + testPeriod) {
+ getDescriptionFromXml();
+ countParsedFromXml++;
+ }
+
+ start = currentTimeMillis();
+ int countWithCachingAndCloning = 0;
+ while ((end = currentTimeMillis()) < start + testPeriod) {
+ FixedFlowController.getDescription();
+ countWithCachingAndCloning++;
+ }
+
+ start = currentTimeMillis();
+ int countGeneratedInMemory = 0;
+ while ((end = currentTimeMillis()) < start + testPeriod) {
+ FixedFlowController.makeDefaultDescription();
+ countGeneratedInMemory++;
+ }
+
+ System.out.printf("[%d] FixedFlowController parsed from XML in [%d]ms%n",
+ countParsedFromXml, end - start );
+ System.out.printf("[%d] FixedFlowController parsed once, cached and cloned in [%d]ms%n",
+ countWithCachingAndCloning, end - start );
+ System.out.printf("[%d] FixedFlowController generated in memory in [%d]ms%n",
+ countGeneratedInMemory, end - start );
+ }
+
+ @Test
+ public void thatGeneratedDefaultFlowDescriptionIsEqualToXmlDescription() throws Exception
+ {
+ FlowControllerDescription desc1 = FixedFlowController.getDescription();
+
+ URL descUrl = FixedFlowController.class
+ .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+ FlowControllerDescription desc2 = getXMLParser().parseFlowControllerDescription(
+ new XMLInputSource(descUrl));
+
+ StringWriter desc1Writer = new StringWriter();
+ desc1.toXML(desc1Writer);
+
+ StringWriter desc2Writer = new StringWriter();
+ desc2.toXML(desc2Writer);
+
+ assertThat(desc2.toString()).isEqualTo(desc1.toString());
+ }
+
+ @Test
+ public void thatChangesToDefaultFlowControllerDoNotCarryOver() throws Exception
+ {
+ FlowControllerDescription desc1 = FixedFlowController.getDescription();
+
+ desc1.setImplementationName("otherImplementation");
+ desc1.getMetaData().setName("otherName");
+
+ FlowControllerDescription desc2 = FixedFlowController.getDescription();
+
+ assertThat(desc2.getImplementationName()).isEqualTo(FixedFlowController.class.getName());
+ assertThat(desc2.getMetaData().getName()).isEqualTo("Fixed Flow Controller");
+ }
+
+ public static FlowControllerDescription getDescriptionFromXml() {
+ URL descUrl = FixedFlowController.class
+ .getResource("/org/apache/uima/flow/FixedFlowController.xml");
+ FlowControllerDescription desc;
+ try {
+ desc = (FlowControllerDescription) UIMAFramework.getXMLParser().parse(
+ new XMLInputSource(descUrl));
+ } catch (InvalidXMLException | IOException e) {
+ throw new UIMARuntimeException(e);
+ }
+ return desc;
+ }
}