You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by al...@apache.org on 2007/01/11 22:32:05 UTC

svn commit: r495384 - in /incubator/uima/uimaj/trunk/uimaj-core/src: main/java/org/apache/uima/util/CasCreationUtils.java test/java/org/apache/uima/util/CasCreationUtilsTest.java

Author: alally
Date: Thu Jan 11 13:32:05 2007
New Revision: 495384

URL: http://svn.apache.org/viewvc?view=rev&rev=495384
Log:
Extended CasCreationUtils methods for type system merging to return list of types where feature-merge was required.
UIMA-182: https://issues.apache.org/jira/browse/UIMA-182

Modified:
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
    incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java?view=diff&rev=495384&r1=495383&r2=495384
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/CasCreationUtils.java Thu Jan 11 13:32:05 2007
@@ -165,13 +165,15 @@
    * Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
    * aggregate as well as primitive analysis engines
    * <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -193,13 +195,15 @@
    * Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
    * aggregate as well as primitive analysis engines
    * <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -223,14 +227,16 @@
   /**
    * Creates a new CAS instance for a collection of CAS Processors. This method correctly handles
    * aggregate as well as primitive analysis engines
-   *  <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * <p>
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -351,13 +357,15 @@
    * system. Using this method allows several CASes to all share the exact same type system object.
    * This method correctly handles aggregate as well as primitive analysis engines.
    * <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -385,13 +393,15 @@
    * system. Using this method allows several CASes to all share the exact same type system object.
    * This method correctly handles aggregate as well as primitive analysis engines.
    * <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -1197,6 +1207,38 @@
    */
   public static TypeSystemDescription mergeTypeSystems(Collection aTypeSystems,
           ResourceManager aResourceManager) throws ResourceInitializationException {
+    return mergeTypeSystems(aTypeSystems, aResourceManager, null);
+  }
+
+  /**
+   * Merges several TypeSystemDescriptions into one. Also resolves imports in the
+   * TypeSystemDescription objects.
+   * <p>
+   * This version of this method takes an argument <code>aOutputMergedTypeNames</code>, to which
+   * this method will add the names of any types whose definitions have been merged from multiple
+   * non-identical sources. That is, types that are declared more than once, with different (but
+   * compatible) sets of features in each declaration, or with different (but compatible)
+   * supertypes.
+   * 
+   * @param aTypeSystems
+   *          a collection of TypeSystems to be merged
+   * @param aResourceManager
+   *          Resource Manager to use to locate type systems imported by name
+   * @param aOutputMergedTypeNames
+   *          A Collection to which this method will add the names of any types whose definitions
+   *          were merged from multiple non-identical sources. The same type name may be added more
+   *          than once, so if you want to eliminate duplicates, pass a Set, not a List. You may
+   *          pass null if you are not interested in this information.
+   * 
+   * @return a new TypeSystemDescription that is the result of merging all of the type systems
+   *         together
+   * 
+   * @throws ResourceInitializationException
+   *           if an incompatibiliy exists or if an import could not be resolved
+   */
+  public static TypeSystemDescription mergeTypeSystems(Collection aTypeSystems,
+          ResourceManager aResourceManager, Collection aOutputMergedTypeNames)
+          throws ResourceInitializationException {
     // create the type system into which we are merging
     TypeSystemDescription result = UIMAFramework.getResourceSpecifierFactory()
             .createTypeSystemDescription();
@@ -1226,6 +1268,10 @@
               existingType.setAllowedValues(types[i].getAllowedValues());
               existingType.setSourceUrl(types[i].getSourceUrl());
               typeNameMap.put(types[i].getName(), existingType);
+              FeatureDescription[] features = types[i].getFeatures();
+              if (features != null) {
+                mergeFeatures(existingType, types[i].getFeatures());
+              }
             } else {
               // type already existed - check that supertypes are compatible
               String supertypeName = types[i].getSupertypeName();
@@ -1236,9 +1282,15 @@
                   // existing supertype subsumes newly specified supertype -
                   // reset supertype to the new, more specific type
                   existingType.setSupertypeName(supertypeName);
+                  if (aOutputMergedTypeNames != null) {
+                    aOutputMergedTypeNames.add(typeName);
+                  }
                 } else if (subsumes(supertypeName, existingSupertypeName, typeNameMap)) {
-                  // newly specified supertype subsumes old type,
-                  // this is OK and we don't need to do anything
+                  // newly specified supertype subsumes old type, this is OK and we don't
+                  // need to do anything except report this
+                  if (aOutputMergedTypeNames != null) {
+                    aOutputMergedTypeNames.add(typeName);
+                  }
                 } else {
                   // error
                   throw new ResourceInitializationException(
@@ -1248,11 +1300,18 @@
                 }
 
               }
-            }
-            // merge features
-            FeatureDescription[] features = types[i].getFeatures();
-            if (features != null) {
-              mergeFeatures(existingType, types[i].getFeatures());
+              // merge features
+              int prevNumFeatures = existingType.getFeatures().length;
+              FeatureDescription[] features = types[i].getFeatures();
+              if (features != null) {
+                mergeFeatures(existingType, types[i].getFeatures());
+                // if feature-merged occurred, the number of features on the type will have
+                // changed. Report this by adding to the aOutputMergedTypeNames collection.
+                if (aOutputMergedTypeNames != null
+                        && existingType.getFeatures().length != prevNumFeatures) {
+                  aOutputMergedTypeNames.add(typeName);
+                }
+              }
             }
           }
         }
@@ -1283,6 +1342,12 @@
   /**
    * Merges the Type Systems of each component within an aggregate Analysis Engine, producing a
    * single combined Type System.
+   * <p>
+   * This version of this method takes an argument <code>aOutputMergedTypeNames</code>, to which
+   * this method will add the names of any types whose definitions have been merged from multiple
+   * non-identical sources. That is, types that are declared more than once, with different (but
+   * compatible) sets of features in each declaration, or with different (but compatible)
+   * supertypes.
    * 
    * @param aAggregateDescription
    *          an aggregate Analysis Engine description
@@ -1298,6 +1363,32 @@
   public static TypeSystemDescription mergeDelegateAnalysisEngineTypeSystems(
           AnalysisEngineDescription aAggregateDescription, ResourceManager aResourceManager)
           throws ResourceInitializationException {
+    return mergeDelegateAnalysisEngineTypeSystems(aAggregateDescription, aResourceManager, null);
+  }
+
+  /**
+   * Merges the Type Systems of each component within an aggregate Analysis Engine, producing a
+   * single combined Type System.
+   * 
+   * @param aAggregateDescription
+   *          an aggregate Analysis Engine description
+   * @param aResourceManager
+   *          ResourceManager instance used to resolve imports
+   * @param aOutputMergedTypeNames
+   *          A Collection to which this method will add the names of any types whose definitions
+   *          were merged from multiple non-identical sources. The same type name may be added more
+   *          than once, so if you want to eliminate duplicates, pass a Set, not a List. You may
+   *          pass null if you are not interested in this information.
+   * 
+   * @return a new TypeSystemDescription that is the result of merging all of the delegate AE type
+   *         systems together
+   * 
+   * @throws ResourceInitializationException
+   *           if an incompatibiliy exists or if an import could not be resolved
+   */
+  public static TypeSystemDescription mergeDelegateAnalysisEngineTypeSystems(
+          AnalysisEngineDescription aAggregateDescription, ResourceManager aResourceManager,
+          Collection aOutputMergedTypeNames) throws ResourceInitializationException {
     // expand the aggregate AE description into the individual delegates
     ArrayList l = new ArrayList();
     l.add(aAggregateDescription);
@@ -1311,7 +1402,7 @@
       if (md.getTypeSystem() != null)
         typeSystems.add(md.getTypeSystem());
     }
-    return mergeTypeSystems(typeSystems, aResourceManager);
+    return mergeTypeSystems(typeSystems, aResourceManager, aOutputMergedTypeNames);
   }
 
   /**
@@ -1593,19 +1684,21 @@
   }
 
   /**
-   * Gets a list of ProcessingResourceMetadata objects from a list containing either ResourceSpecifiers,
-   * ProcessingResourceMetadata objects, or subparts of ProcessingResourceMetadata
-   * objects (type sypstems, indexes, or type priorities). Subparts will be wrapped inside a
-   * ProcessingResourceMetadata object. All objects will be cloned, so that further processing (such
-   * as import resolution) does not affect the caller.
+   * Gets a list of ProcessingResourceMetadata objects from a list containing either
+   * ResourceSpecifiers, ProcessingResourceMetadata objects, or subparts of
+   * ProcessingResourceMetadata objects (type sypstems, indexes, or type priorities). Subparts will
+   * be wrapped inside a ProcessingResourceMetadata object. All objects will be cloned, so that
+   * further processing (such as import resolution) does not affect the caller.
    * <p>
-   * If you pass this method objects of type {@link AnalysisEngineDescription}, 
-   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or {@link CasConsumerDescription},
-   * it will not instantiate the components.  It will just extract the type system information from the
-   * descriptor.  For any other kind of {@link ResourceSpecifier}, it will call
-   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}.  For example,
-   * if a {@link URISpecifier} is passed, a remote connection will be established and the service will be
-   * queries for its metadata.  An exception will be thrown if the connection can not be opened.
+   * If you pass this method objects of type {@link AnalysisEngineDescription},
+   * {@link CollectionReaderDescription}, {@link CasInitializerDescription}, or
+   * {@link CasConsumerDescription}, it will not instantiate the components. It will just extract
+   * the type system information from the descriptor. For any other kind of
+   * {@link ResourceSpecifier}, it will call
+   * {@link UIMAFramework#produceResource(org.apache.uima.resource.ResourceSpecifier, Map)}. For
+   * example, if a {@link URISpecifier} is passed, a remote connection will be established and the
+   * service will be queries for its metadata. An exception will be thrown if the connection can not
+   * be opened.
    * 
    * @param aComponentDescriptionsOrMetaData
    *          a collection of {@link ResourceSpecifier}, {@link ProcessingResourceMetaData},
@@ -1666,14 +1759,14 @@
         md.setTypePriorities((TypePriorities) current);
         mdList.add(md);
       } else if (current instanceof ResourceSpecifier) {
-        Resource resource = UIMAFramework.produceResource((ResourceSpecifier)current, Collections.EMPTY_MAP);
+        Resource resource = UIMAFramework.produceResource((ResourceSpecifier) current,
+                Collections.EMPTY_MAP);
         ResourceMetaData metadata = resource.getMetaData();
         if (metadata instanceof ProcessingResourceMetaData) {
           mdList.add(metadata);
-        }      
+        }
         resource.destroy();
-      }
-      else {
+      } else {
         throw new ResourceInitializationException(
                 ResourceInitializationException.UNSUPPORTED_OBJECT_TYPE_IN_CREATE_CAS,
                 new Object[] { current.getClass().getName() });

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java?view=diff&rev=495384&r1=495383&r2=495384
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/util/CasCreationUtilsTest.java Thu Jan 11 13:32:05 2007
@@ -21,6 +21,8 @@
 
 import java.io.File;
 import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Set;
 
 import junit.framework.Assert;
 import junit.framework.TestCase;
@@ -71,6 +73,7 @@
 
       Assert.assertEquals(1, ts1desc.getType("Type1").getFeatures().length);
       Assert.assertEquals(1, ts1desc.getType("Type2").getFeatures().length);
+      Assert.assertEquals(1, ts1desc.getType("Type3").getFeatures().length);
 
       TypeSystemDescription ts2desc = UIMAFramework.getXMLParser().parseTypeSystemDescription(
               new XMLInputSource(JUnitExtension.getFile("CasCreationUtilsTest/TypeSystem2.xml")));
@@ -80,10 +83,20 @@
       ArrayList tsList = new ArrayList();
       tsList.add(ts1desc);
       tsList.add(ts2desc);
-      TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(tsList);
+      Set typesWithMergedFeatures = new HashSet();
+      TypeSystemDescription merged = CasCreationUtils.mergeTypeSystems(tsList, UIMAFramework.newDefaultResourceManager(), typesWithMergedFeatures);
 
       Assert.assertEquals(2, merged.getType("Type1").getFeatures().length);
       Assert.assertEquals(2, merged.getType("Type2").getFeatures().length);
+      Assert.assertEquals(1, merged.getType("Type3").getFeatures().length);
+      
+      assertEquals(2, typesWithMergedFeatures.size());
+      assertTrue(typesWithMergedFeatures.contains("Type1"));
+      assertTrue(typesWithMergedFeatures.contains("Type2"));
+      
+      //make sure one-arg version doesn't fail
+      CasCreationUtils.mergeTypeSystems(tsList);
+      
     } catch (Exception e) {
       JUnitExtension.handleException(e);
     }
@@ -187,13 +200,15 @@
     }
   }
 
-  public void testMergeDelegateAnalysisEngineMetaData() throws Exception {
+  public void testMergeDelegateAnalysisEngineTypeSystems() throws Exception {
     try {
       File descFile = JUnitExtension
               .getFile("TextAnalysisEngineImplTest/AggregateTaeForMergeTest.xml");
       AnalysisEngineDescription desc = UIMAFramework.getXMLParser().parseAnalysisEngineDescription(
               new XMLInputSource(descFile));
-      TypeSystemDescription typeSys = CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc);
+      Set mergedTypes = new HashSet();
+      TypeSystemDescription typeSys = CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc, 
+              UIMAFramework.newDefaultResourceManager(), mergedTypes);
 
       // test results of merge
       Assert.assertEquals(8, typeSys.getTypes().length);
@@ -237,6 +252,15 @@
       Assert.assertNotNull(type7);
       Assert.assertEquals("uima.tcas.Annotation", type7.getSupertypeName());
       Assert.assertEquals(1, type7.getFeatures().length);
+      
+      //Place has merged features, Person has different supertype
+      assertEquals(2, mergedTypes.size());
+      assertTrue(mergedTypes.contains("Place"));
+      assertTrue(mergedTypes.contains("Person"));
+      
+      //make sure one-arg version doesn't fail
+      CasCreationUtils.mergeDelegateAnalysisEngineTypeSystems(desc);
+      
     } catch (Exception e) {
       JUnitExtension.handleException(e);
     }