You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by al...@apache.org on 2007/01/11 22:32:05 UTC
svn commit: r495384 - in /incubator/uima/uimaj/trunk/uimaj-core/src:
main/java/org/apache/uima/util/CasCreationUtils.java
test/java/org/apache/uima/util/CasCreationUtilsTest.java
Author: alally
Date: Thu Jan 11 13:32:05 2007
New Revision: 495384
URL: http://svn.apache.org/viewvc?view=rev&rev=495384
Log:
Extended CasCreationUtils methods for type system merging to return list of types where feature-merge was required.
UIMA-182: https://issues.apache.org/jira/browse/UIMA-182
Modified:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java?view=diff&rev=495384&r1=495383&r2=495384
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java Thu Jan 11 13:32:05 2007
@@ -165,13 +165,15 @@
* Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
* aggregate as well as primitive analysis engines
* <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -193,13 +195,15 @@
* Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
* aggregate as well as primitive analysis engines
* <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -223,14 +227,16 @@
/**
* Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
* aggregate as well as primitive analysis engines
- * <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * <p>
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -351,13 +357,15 @@
* system. Using this method allows several CASes to all share the exact same type system object.
* This method correctly handles aggregate as well as primitive analysis engines.
* <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -385,13 +393,15 @@
* system. Using this method allows several CASes to all share the exact same type system object.
* This method correctly handles aggregate as well as primitive analysis engines.
* <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -1197,6 +1207,38 @@
*/
public static TypeSystemDescription mergeTypeSystems(Collection aTypeSystems,
ResourceManager aResourceManager) throws ResourceInitializationException {
+ return mergeTypeSystems(aTypeSystems, aResourceManager, null);
+ }
+
+ /**
+ * Merges several TypeSystemDescriptions into one. Also resolves imports in the
+ * TypeSystemDescription objects.
+ * <p>
+ * This version of this method takes an argument <code>aOutputMergedTypeNames</code>, to which
+ * this method will add the names of any types whose definitions have been merged from multiple
+ * non-identical sources. That is, types that are declared more than once, with different (but
+ * compatible) sets of features in each declaration, or with different (but compatible)
+ * supertypes.
+ *
+ * @param aTypeSystems
+ * a collection of TypeSystems to be merged
+ * @param aResourceManager
+ * Resource Manager to use to locate type systems imported by name
+ * @param aOutputMergedTypeNames
+ * A Collection to which this method will add the names of any types whose definitions
+ * were merged from multiple non-identical sources. The same type name may be added more
+ * than once, so if you want to eliminate duplicates, pass a Set, not a List. You may
+ * pass null if you are not interested in this information.
+ *
+ * @return a new TypeSystemDescription that is the result of merging all of the type systems
+ * together
+ *
+ * @throws ResourceInitializationException
+ * if an incompatibiliy exists or if an import could not be resolved
+ */
+ public static TypeSystemDescription mergeTypeSystems(Collection aTypeSystems,
+ ResourceManager aResourceManager, Collection aOutputMergedTypeNames)
+ throws ResourceInitializationException {
// create the type system into which we are merging
TypeSystemDescription result = UIMAFramework.getResourceSpecifierFactory()
.createTypeSystemDescription();
@@ -1226,6 +1268,10 @@
existingType.setAllowedValues(types[i].getAllowedValues());
existingType.setSourceUrl(types[i].getSourceUrl());
typeNameMap.put(types[i].getName(), existingType);
+ FeatureDescription[] features = types[i].getFeatures();
+ if (features != null) {
+ mergeFeatures(existingType, types[i].getFeatures());
+ }
} else {
// type already existed - check that supertypes are compatible
String supertypeName = types[i].getSupertypeName();
@@ -1236,9 +1282,15 @@
// existing supertype subsumes newly specified supertype -
// reset supertype to the new, more specific type
existingType.setSupertypeName(supertypeName);
+ if (aOutputMergedTypeNames != null) {
+ aOutputMergedTypeNames.add(typeName);
+ }
} else if (subsumes(supertypeName, existingSupertypeName, typeNameMap)) {
- // newly specified supertype subsumes old type,
- // this is OK and we don't need to do anything
+ // newly specified supertype subsumes old type, this is OK and we don't
+ // need to do anything except report this
+ if (aOutputMergedTypeNames != null) {
+ aOutputMergedTypeNames.add(typeName);
+ }
} else {
// error
throw new ResourceInitializationException(
@@ -1248,11 +1300,18 @@
}
}
- }
- // merge features
- FeatureDescription[] features = types[i].getFeatures();
- if (features != null) {
- mergeFeatures(existingType, types[i].getFeatures());
+ // merge features
+ int prevNumFeatures = existingType.getFeatures().length;
+ FeatureDescription[] features = types[i].getFeatures();
+ if (features != null) {
+ mergeFeatures(existingType, types[i].getFeatures());
+ // if feature-merged occurred, the number of features on the type will have
+ // changed. Report this by adding to the aOutputMergedTypeNames collection.
+ if (aOutputMergedTypeNames != null
+ && existingType.getFeatures().length != prevNumFeatures) {
+ aOutputMergedTypeNames.add(typeName);
+ }
+ }
}
}
}
@@ -1283,6 +1342,12 @@
/**
* Merges the Type Systems of each component within an aggregate Analysis Engine, producing a
* single combined Type System.
+ * <p>
+ * This version of this method takes an argument <code>aOutputMergedTypeNames</code>, to which
+ * this method will add the names of any types whose definitions have been merged from multiple
+ * non-identical sources. That is, types that are declared more than once, with different (but
+ * compatible) sets of features in each declaration, or with different (but compatible)
+ * supertypes.
*
* @param aAggregateDescription
* an aggregate Analysis Engine description
@@ -1298,6 +1363,32 @@
public static TypeSystemDescription mergeDelegateAnalysisEngineTypeSystems(
AnalysisEngineDescription aAggregateDescription, ResourceManager aResourceManager)
throws ResourceInitializationException {
+ return mergeDelegateAnalysisEngineTypeSystems(aAggregateDescription, aResourceManager, null);
+ }
+
+ /**
+ * Merges the Type Systems of each component within an aggregate Analysis Engine, producing a
+ * single combined Type System.
+ *
+ * @param aAggregateDescription
+ * an aggregate Analysis Engine description
+ * @param aResourceManager
+ * ResourceManager instance used to resolve imports
+ * @param aOutputMergedTypeNames
+ * A Collection to which this method will add the names of any types whose definitions
+ * were merged from multiple non-identical sources. The same type name may be added more
+ * than once, so if you want to eliminate duplicates, pass a Set, not a List. You may
+ * pass null if you are not interested in this information.
+ *
+ * @return a new TypeSystemDescription that is the result of merging all of the delegate AE type
+ * systems together
+ *
+ * @throws ResourceInitializationException
+ * if an incompatibiliy exists or if an import could not be resolved
+ */
+ public static TypeSystemDescription mergeDelegateAnalysisEngineTypeSystems(
+ AnalysisEngineDescription aAggregateDescription, ResourceManager aResourceManager,
+ Collection aOutputMergedTypeNames) throws ResourceInitializationException {
// expand the aggregate AE description into the individual delegates
ArrayList l = new ArrayList();
l.add(aAggregateDescription);
@@ -1311,7 +1402,7 @@
if (md.getTypeSystem() != null)
typeSystems.add(md.getTypeSystem());
}
- return mergeTypeSystems(typeSystems, aResourceManager);
+ return mergeTypeSystems(typeSystems, aResourceManager, aOutputMergedTypeNames);
}
/**
@@ -1593,19 +1684,21 @@
}
/**
- * Gets a list of ProcessingResourceMetadata objects from a list containing either ResourceSpecifiers,
- * ProcessingResourceMetadata objects, or subparts of ProcessingResourceMetadata
- * objects (type sypstems, indexes, or type priorities). Subparts will be wrapped inside a
- * ProcessingResourceMetadata object. All objects will be cloned, so that further processing (such
- * as import resolution) does not affect the caller.
+ * Gets a list of ProcessingResourceMetadata objects from a list containing either
+ * ResourceSpecifiers, ProcessingResourceMetadata objects, or subparts of
+ * ProcessingResourceMetadata objects (type sypstems, indexes, or type priorities). Subparts will
+ * be wrapped inside a ProcessingResourceMetadata object. All objects will be cloned, so that
+ * further processing (such as import resolution) does not affect the caller.
* <p>
- * If you pass this method objects of type {@link AnalysisEngineDescription},
- * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
- * it will not instantiate the components. It will just extract the type system information from the
- * descriptor. For any other kind of {@link ResourceSpecifier}, it will call
- * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For example,
- * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
- * queries for its metadata. An exception will be thrown if the connection can not be opened.
+ * If you pass this method objects of type {@link AnalysisEngineDescription},
+ * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+ * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+ * the type system information from the descriptor. For any other kind of
+ * {@link ResourceSpecifier}, it will call
+ * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+ * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+ * service will be queries for its metadata. An exception will be thrown if the connection can not
+ * be opened.
*
* @param aComponentDescriptionsOrMetaData
* a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -1666,14 +1759,14 @@
md.setTypePriorities((TypePriorities) current);
mdList.add(md);
} else if (current instanceof ResourceSpecifier) {
- Resource resource = UIMAFramework.produceResource((ResourceSpecifier)current, Collections.EMPTY_MAP);
+ Resource resource = UIMAFramework.produceResource((ResourceSpecifier) current,
+ Collections.EMPTY_MAP);
ResourceMetaData metadata = resource.getMetaData();
if (metadata instanceof ProcessingResourceMetaData) {
mdList.add(metadata);
- }
+ }
resource.destroy();
- }
- else {
+ } else {
throw new ResourceInitializationException(
ResourceInitializationException.UNSUPPORTED_OBJECT_TYPE_IN_CREATE_CAS,
new Object[] { current.getClass().getName() });
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java?view=diff&rev=495384&r1=495383&r2=495384
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java Thu Jan 11 13:32:05 2007
@@ -21,6 +21,8 @@
import java.io.File;
import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
import junit.framework.Assert;
import junit.framework.TestCase;
@@ -71,6 +73,7 @@
Assert.assertEquals(1, ts1desc.getType("Type1").getFeatures().length);
Assert.assertEquals(1, ts1desc.getType("Type2").getFeatures().length);
+ Assert.assertEquals(1, ts1desc.getType("Type3").getFeatures().length);
TypeSystemDescription ts2desc = UIMAFramework.getXMLParser().parseTypeSystemDescription(
new XMLInputSource(JUnitExtension.getFile("CasCreationUtilsTest/TypeSystem2.xml")));
@@ -80,10 +83,20 @@
ArrayList tsList = new ArrayList();
tsList.add(ts1desc);
tsList.add(ts2desc);
- TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(tsList);
+ Set typesWithMergedFeatures = new HashSet();
+ TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(tsList, UIMAFramework.newDefaultResourceManager(), typesWithMergedFeatures);
Assert.assertEquals(2, merged.getType("Type1").getFeatures().length);
Assert.assertEquals(2, merged.getType("Type2").getFeatures().length);
+ Assert.assertEquals(1, merged.getType("Type3").getFeatures().length);
+
+ assertEquals(2, typesWithMergedFeatures.size());
+ assertTrue(typesWithMergedFeatures.contains("Type1"));
+ assertTrue(typesWithMergedFeatures.contains("Type2"));
+
+ //make sure one-arg version doesn't fail
+ CasCreationUtils.mergeTypeSystems(tsList);
+
} catch (Exception e) {
JUnitExtension.handleException(e);
}
@@ -187,13 +200,15 @@
}
}
- public void testMergeDelegateAnalysisEngineMetaData() throws Exception {
+ public void testMergeDelegateAnalysisEngineTypeSystems() throws Exception {
try {
File descFile = JUnitExtension
.getFile("TextAnalysisEngineImplTest/AggregateTaeForMergeTest.xml");
AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
new XMLInputSource(descFile));
- TypeSystemDescription typeSys = CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc);
+ Set mergedTypes = new HashSet();
+ TypeSystemDescription typeSys = CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc,
+ UIMAFramework.newDefaultResourceManager(), mergedTypes);
// test results of merge
Assert.assertEquals(8, typeSys.getTypes().length);
@@ -237,6 +252,15 @@
Assert.assertNotNull(type7);
Assert.assertEquals("uima.tcas.Annotation", type7.getSupertypeName());
Assert.assertEquals(1, type7.getFeatures().length);
+
+ //Place has merged features, Person has different supertype
+ assertEquals(2, mergedTypes.size());
+ assertTrue(mergedTypes.contains("Place"));
+ assertTrue(mergedTypes.contains("Person"));
+
+ //make sure one-arg version doesn't fail
+ CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc);
+
} catch (Exception e) {
JUnitExtension.handleException(e);
}