You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by al...@apache.org on 2007/02/27 21:58:40 UTC

svn commit: r512404 [1/2] - in /incubator/uima/uimaj/trunk/uimaj-core/src: main/java/org/apache/uima/cas/impl/ main/java/org/apache/uima/internal/util/ test/java/org/apache/uima/cas/impl/ test/java/org/apache/uima/cas_data/impl/ test/resources/ExampleCas/

Author: alally
Date: Tue Feb 27 12:58:39 2007
New Revision: 512404

URL: http://svn.apache.org/viewvc?view=rev&rev=512404
Log:
XMI CAS Serializer/Deserializer updates for merging and 
out-of-typesystem data.
UIMA-325: http://issues.apache.org/jira/browse/UIMA-325
UIMA-326: http://issues.apache.org/jira/browse/UIMA-326

Added:
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java
    incubator/uima/uimaj/trunk/uimaj-core/src/test/resources/ExampleCas/simpleCas.xmi
Modified:
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
    incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java
    incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java
    incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas_data/impl/XCasToCasDataSaxHandlerTest.java
    incubator/uima/uimaj/trunk/uimaj-core/src/test/resources/ExampleCas/partialTestTypeSystem.xml

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java Tue Feb 27 12:58:39 2007
@@ -24,7 +24,9 @@
 import java.util.ListIterator;
 
 import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
 import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.XmlAttribute;
 import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
 import org.apache.uima.util.Level;
 import org.apache.uima.util.Logger;
@@ -255,15 +257,27 @@
     curNode = addr;
     for (int i = 0; i < length; i++) {
       int heapVal = cas.getHeapValue(curNode + cas.getFeatureOffset(fsHeadFeat));
-      if (sharedData != null) {
-        strArray[i] = sharedData.getXmiId(heapVal);
-      } else {
-        strArray[i] = Integer.toString(heapVal);
-      }
-      if (strArray[i] == null) {
-        // special NULL object with xmi:id=0 is used to represent
-        // a null in an FSArray
+      if (heapVal == 0) {
+        //null value in list.  Represent with "0".
         strArray[i] = "0";
+        // However, this may be null because the element was originally a reference to an 
+        // out-of-typesystem FS, so chck the XmiSerializationSharedData
+        if (sharedData != null) {
+          OotsElementData oed = sharedData.getOutOfTypeSystemFeatures(curNode);
+          if (oed != null) {
+            assert oed.attributes.size() == 1; //only the head feature can possibly be here
+            XmlAttribute attr = (XmlAttribute)oed.attributes.get(0);
+            assert CAS.FEATURE_BASE_NAME_HEAD.equals(attr.name);
+            strArray[i] = attr.value;
+          }
+        }        
+      }
+      else {
+        if (sharedData != null) {
+          strArray[i] = heapVal == 0 ? null : sharedData.getXmiId(heapVal);
+        } else {
+          strArray[i] = Integer.toString(heapVal);
+        }
       }
       curNode = cas.getHeapValue(curNode + cas.getFeatureOffset(fsTailFeat));
     }

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java Tue Feb 27 12:58:39 2007
@@ -43,10 +43,12 @@
 import org.apache.uima.cas.SofaFS;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
 import org.apache.uima.internal.util.I18nUtil;
 import org.apache.uima.internal.util.IntVector;
-import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
-import org.apache.uima.internal.util.rb_trees.IntRedBlackTree.IntRBTIterator;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
 import org.xml.sax.Attributes;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.InputSource;
@@ -78,11 +80,16 @@
 
     // Inside a feature element. We expect the feature value.
     private static final int FEAT_CONTENT_STATE = 3;
-
+    
     // Inside an element with the XMI namespace - indicating content that's
     // not part of the typesystem and should be ignored.
     private static final int IGNORING_XMI_ELEMENTS_STATE = 4;
 
+    // Inside a reference feature element (e.g. <feat href="#1"). 
+    // We expect no content, just the end of the element.
+    private static final int REF_FEAT_STATE = 5;
+
+
     // End parser states.
     // ///////////////////////////////////////////////////////////////////////
 
@@ -98,11 +105,9 @@
     // The CAS we're filling.
     private CASImpl cas;
 
-    // Store FSs with ID in a search tree (keys are xmi ID, values are CAS address)
-    private IntRedBlackTree fsTree;
-
-    // Store IDless FSs in a vector
-    private IntVector idLess;
+    // Store address of every FS we've deserialized, since we need to back
+    // and apply fix-ups afterwards.
+    private IntVector deserializedFsAddrs;
 
     // Store a separate vector of FSList nodes that were deserialized from multivalued properties.
     // These are special because their "head" feature needs remapping but their "tail" feature
@@ -169,21 +174,53 @@
     // with a non-namespace-enabled SAX parser.
     private HashMap nsPrefixToUriMap = new HashMap();
 
+    // container for data shared between the XmiCasSerialier and
+    // XmiDeserializer, to support things such as consistency of IDs across
+    // multiple serializations.  This is also where the map from xmi:id to
+    // FS address is stored.
     private XmiSerializationSharedData sharedData;
 
     // number of Sofas found so far
     private int nextSofaNum;
+    
+    //used for merging multiple XMI CASes into one CAS object.
+    private int mergePoint;
+    
+    //Current out-of-typesystem element, if any
+    private OotsElementData outOfTypeSystemElement = null;
 
+    /**
+     * Creates a SAX handler used for deserializing an XMI CAS.
+     * @param aCAS CAS to deserialize into
+     * @param lenient if true, unknown types/features result in an
+     *   exception.  If false, unknown types/features are ignored.
+     * @param sharedData data structure used to allow the XmiCasSerializer and 
+     *   XmiCasDeserializer to share information.
+     * @param mergePoint used to support merging multiple XMI CASes.  If the
+     *   mergePoint is negative, "normal" deserialization will be done,
+     *   meaning the target CAS will be reset and the entire XMI content will
+     *   be deserialized.  If the mergePoint is nonnegative (including 0), the 
+     *   target CAS will not be reset, and only Feature Structures whose
+     *   xmi:id is strictly greater than the mergePoint value will be
+     *   deserialized. 
+     */
     private XmiCasDeserializerHandler(CASImpl aCAS, boolean lenient,
-            XmiSerializationSharedData sharedData) {
+            XmiSerializationSharedData sharedData, int mergePoint) {
       super();
       this.cas = aCAS.getBaseCAS();
       this.lenient = lenient;
-      this.sharedData = sharedData;
-      // Reset the CAS. Necessary to get Sofas to work properly.
-      cas.resetNoQuestions();
-      this.fsTree = new IntRedBlackTree();
-      this.idLess = new IntVector();
+      this.sharedData = 
+        sharedData != null ? sharedData : new XmiSerializationSharedData();
+      this.mergePoint = mergePoint;
+      if (mergePoint < 0) {
+        //If not merging, reset the CAS. 
+        //Necessary to get Sofas to work properly.
+        cas.resetNoQuestions();
+        
+        // clear ID mappings stored in the SharedData (from previous deserializations)
+        this.sharedData.clearIdMap();
+      }
+      this.deserializedFsAddrs = new IntVector();
       this.fsListNodesFromMultivaluedProperties = new IntVector();
       this.buffer = new StringBuffer();
       this.indexRepositories = new ArrayList();
@@ -197,11 +234,6 @@
       this.nextSofaNum = 2;
       this.listUtils = new ListUtils(cas, UIMAFramework.getLogger(XmiCasDeserializer.class), null);
 
-      // clear ID mappings stored in the SharedData (from previous deserializations)
-      if (this.sharedData != null) {
-        this.sharedData.clearIdMap();
-      }
-
       // populate feature type table
       this.featureType = new int[cas.ts.getNumberOfFeatures() + 1];
       FeatureImpl feat;
@@ -268,6 +300,18 @@
             this.ignoreDepth++;
             return;
           }
+          // if we're doing merging, skip elements whose ID is <= mergePoint
+          if (this.mergePoint >= 0) {
+            String id = attrs.getValue(ID_ATTR_NAME);
+            if (id != null) {
+              int idInt = Integer.parseInt(id);
+              if (idInt <= this.mergePoint) {
+                this.state = IGNORING_XMI_ELEMENTS_STATE;
+                this.ignoreDepth++;
+                return;
+              }
+            }
+          }
           if (nameSpaceURI == null || nameSpaceURI.length() == 0) {
             // parser may not be namespace-enabled, so try to resolve NS ourselves
             int colonIndex = qualifiedName.indexOf(':');
@@ -285,16 +329,49 @@
             }
           }
 
-          String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
-
-          readFS(typeName, attrs);
+          readFS(nameSpaceURI, localName, qualifiedName, attrs);
 
           multiValuedFeatures.clear();
           state = FEAT_STATE;
           break;
         }
         case FEAT_STATE: {
-          state = FEAT_CONTENT_STATE;
+          //parsing a feature recorded as a child element
+          //check for an "href" feature, used for references
+          String href = attrs.getValue("href");
+          if (href != null && href.startsWith("#")) {           
+            //for out-of-typesystem objects, there's special handling here
+            //to keep track of the fact this was an href so we re-serialize
+            //correctly.
+            if (this.outOfTypeSystemElement != null) {
+              XmlElementName elemName = new XmlElementName(nameSpaceURI, localName, qualifiedName);
+              List ootsAttrs = new ArrayList();
+              ootsAttrs.add(new XmlAttribute("href", href));
+              XmlElementNameAndContents elemWithContents = new XmlElementNameAndContents(elemName, null, ootsAttrs);
+              this.outOfTypeSystemElement.childElements.add(elemWithContents);
+            }
+            else {
+              //In-typesystem FS, so we can forget this was an href and just add
+              //the integer value, which will be interpreted as a reference later.
+              //NOTE: this will end up causing it to be reserialized as an attribute
+              //rather than an element, but that is not in violation of the XMI spec.
+              ArrayList valueList = (ArrayList) this.multiValuedFeatures.get(qualifiedName);
+              if (valueList == null) {
+                valueList = new ArrayList();
+                this.multiValuedFeatures.put(qualifiedName, valueList);
+              }
+              valueList.add(href.substring(1));
+            }                         
+            state = REF_FEAT_STATE;
+          }
+          else {
+            //non-reference feature, expecting feature value as character content
+            state = FEAT_CONTENT_STATE;
+          }
+          break;
+        }
+        case IGNORING_XMI_ELEMENTS_STATE: {
+          ignoreDepth++;
           break;
         }
         default: {
@@ -305,7 +382,10 @@
     }
 
     // Create a new FS.
-    private void readFS(String typeName, Attributes attrs) throws SAXParseException {
+    private void readFS(String nameSpaceURI, String localName, String qualifiedName, 
+            Attributes attrs) throws SAXException {
+      String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
+      
       currentType = (TypeImpl) ts.getType(typeName);
       if (currentType == null) {
         // ignore NULL type
@@ -317,8 +397,12 @@
           processView(attrs.getValue("sofa"), attrs.getValue("members"));
           return;
         }
+        // type is not in our type system
         if (!lenient) {
           throw createException(XCASParsingException.UNKNOWN_TYPE, typeName);
+        } else {
+          addToOutOfTypeSystemData(
+              new XmlElementName(nameSpaceURI, localName, qualifiedName), attrs);                  
         }
         return;
       } else if (cas.isArrayType(currentType)) {
@@ -365,7 +449,7 @@
         if (sofa != null) {
           // translate sofa's xmi:id into its sofanum
           int sofaXmiId = Integer.parseInt(sofa);
-          int sofaAddr = fsTree.get(sofaXmiId);
+          int sofaAddr = getFsAddrForXmiId(sofaXmiId);
           sofaNum = cas.getFeatureValue(sofaAddr, sofaNumFeatCode);
         }
         FSIndexRepositoryImpl indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);
@@ -374,18 +458,24 @@
         // intermediate String[]?
         String[] members = parseArray(membersString);
         for (int i = 0; i < members.length; i++) {
+          int id = Integer.parseInt(members[i]);
+          //if merging, don't try to index anything below the merge point
+          if (id <= this.mergePoint) {
+            continue;
+          }
           // have to map each ID to its "real" address (TODO: optimize?)
-          int addr;
           try {
-            addr = fsTree.get(Integer.parseInt(members[i]));
+            int addr = getFsAddrForXmiId(id);
+            indexRep.addFS(addr);
           } catch (NoSuchElementException e) {
-            if (!lenient)
+            if (!lenient) {
               throw e;
-            // when running in lenient mode, we will have skipped FSs that
-            // are of unknown types. So ignore members of the View which are not found.
-            continue;
+            }
+            else {
+              //unknown view member may be an OutOfTypeSystem FS
+              this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
+            }
           }
-          indexRep.addFS(addr);
         }
       }
     }
@@ -397,7 +487,7 @@
      * @throws SAXParseException
      */
     private void readFS(final int addr, Attributes attrs) throws SAXParseException {
-      // Hang on address for setting content feature
+      // Hang on to address for handle features encoded as child elements
       this.currentAddr = addr;
       int id = -1;
       String attrName, attrValue;
@@ -433,7 +523,7 @@
           } else if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFANUM)) {
             attrValue = Integer.toString(thisSofaNum);
           }
-          handleFeature(type, addr, attrName, attrValue, false);
+          handleFeature(type, addr, attrName, attrValue);
         }
       }
       if (sofaTypeCode == typeCode) {
@@ -451,13 +541,9 @@
         ((CASImpl) view).registerView(sofa);
         views.add(view);
       }
-      if (id < 0) {
-        idLess.add(addr);
-      } else {
-        fsTree.put(id, addr);
-        if (sharedData != null) {
-          sharedData.addIdMapping(addr, id);
-        }
+      deserializedFsAddrs.add(addr);
+      if (id > 0) {
+        sharedData.addIdMapping(addr, id);
       }
     }
 
@@ -467,25 +553,29 @@
       return ((val == null) || (val.length() == 0));
     }
 
-    private void handleFeature(final Type type, int addr, String featName, String featVal,
-            boolean aLenient) throws SAXParseException {
+    private void handleFeature(final Type type, int addr, String featName, String featVal) throws SAXParseException {
       final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
       if (feat == null) {
-        if (!aLenient) {
+        if (!this.lenient) {
           throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
         }
+        else {
+          sharedData.addOutOfTypeSystemAttribute(addr, featName, featVal);
+        }
         return;
       }
       handleFeature(addr, feat.getCode(), featVal);
     }
 
-    private void handleFeature(final Type type, int addr, String featName, List featVals,
-            boolean aLenient) throws SAXParseException {
+    private void handleFeature(final Type type, int addr, String featName, List featVals) throws SAXParseException {
       final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
       if (feat == null) {
-        if (!aLenient) {
+        if (!this.lenient) {
           throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
         }
+        else {
+          sharedData.addOutOfTypeSystemChildElements(addr, featName, featVals);
+        }
         return;
       }
       handleFeature(addr, feat.getCode(), featVals);
@@ -511,7 +601,7 @@
                 // special handling for "sofa" feature of annotation. Need to change
                 // it from a sofa reference into a sofa number
                 int sofaXmiId = Integer.parseInt(featVal);
-                int sofaAddr = fsTree.get(sofaXmiId);
+                int sofaAddr = getFsAddrForXmiId(sofaXmiId);
                 int sofaNum = cas.getFeatureValue(sofaAddr, sofaNumFeatCode);
                 cas.setFeatureValue(addr, featCode, sofaNum);
               } else {
@@ -751,13 +841,9 @@
         cas.setArrayValueFromString(casArray, i, stringVal);
       }
 
-      if (xmiId < 0) {
-        idLess.add(casArray);
-      } else {
-        fsTree.put(xmiId, casArray);
-        if (sharedData != null) {
-          sharedData.addIdMapping(casArray, xmiId);
-        }
+      deserializedFsAddrs.add(casArray);
+      if (xmiId > 0) {
+        sharedData.addIdMapping(casArray, xmiId);
       }
       return casArray;
     }
@@ -782,13 +868,9 @@
       }
 
       int arrayAddr = ((FeatureStructureImpl) fs).getAddress();
-      if (xmiId < 0) {
-        idLess.add(arrayAddr);
-      } else {
-        fsTree.put(xmiId, arrayAddr);
-        if (sharedData != null) {
-          sharedData.addIdMapping(arrayAddr, xmiId);
-        }
+      deserializedFsAddrs.add(arrayAddr);
+      if (xmiId > 0) {
+        sharedData.addIdMapping(arrayAddr, xmiId);
       }
       return arrayAddr;
     }
@@ -859,10 +941,26 @@
           this.state = FEAT_STATE;
           break;
         }
+        case REF_FEAT_STATE: {
+          this.state = FEAT_STATE;
+          break;
+        }
         case FEAT_STATE: {
           // end of FS. Process multi-valued features or array elements that were
           // encoded as subelements
-          if (currentType != null) {
+          if (this.outOfTypeSystemElement != null) {
+            if (!this.multiValuedFeatures.isEmpty()) {
+              Iterator iter = this.multiValuedFeatures.entrySet().iterator();
+              while (iter.hasNext()) {
+                Map.Entry entry = (Map.Entry) iter.next();
+                String featName = (String) entry.getKey();
+                List featVals = (List) entry.getValue();
+                addOutOfTypeSystemFeature(outOfTypeSystemElement, featName, featVals);
+              }
+            }
+            this.outOfTypeSystemElement = null;
+          }
+          else if (currentType != null) {
             if (cas.isArrayType(currentType) && !cas.isByteArrayType(currentType)) {
               // create the array now. elements may have been provided either as
               // attributes or child elements, but not both.
@@ -882,7 +980,7 @@
                 Map.Entry entry = (Map.Entry) iter.next();
                 String featName = (String) entry.getKey();
                 List featVals = (List) entry.getValue();
-                handleFeature(currentType, currentAddr, featName, featVals, false);
+                handleFeature(currentType, currentAddr, featName, featVals);
               }
             }
           }
@@ -905,24 +1003,9 @@
      * @see org.xml.sax.ContentHandler#endDocument()
      */
     public void endDocument() throws SAXException {
-      // time = System.currentTimeMillis() - time;
-      // System.out.println("Done reading xml data in " + new TimeSpan(time));
-      // System.out.println(
-      // "Resolving references for id data (" + fsTree.size() + ").");
-      // time = System.currentTimeMillis();
-
       // Resolve ID references, and add FSs to indexes
-      IntRBTIterator it = fsTree.iterator();
-      while (it.hasNext()) {
-        finalizeFS(it.next());
-      }
-      // time = System.currentTimeMillis() - time;
-      // System.out.println("Done in " + new TimeSpan(time));
-      // System.out.println(
-      // "Resolving references for non-id data (" + idLess.size() + ").");
-      // time = System.currentTimeMillis();
-      for (int i = 0; i < idLess.size(); i++) {
-        finalizeFS(idLess.get(i));
+      for (int i = 0; i < deserializedFsAddrs.size(); i++) {
+        finalizeFS(deserializedFsAddrs.get(i));
       }
       for (int i = 0; i < fsListNodesFromMultivaluedProperties.size(); i++) {
         remapFSListHeads(fsListNodesFromMultivaluedProperties.get(i));
@@ -961,12 +1044,18 @@
           if (featVal != CASImpl.NULL) {
             int fsValAddr = CASImpl.NULL;
             try {
-              fsValAddr = fsTree.get(featVal);
+              fsValAddr = getFsAddrForXmiId(featVal);
             } catch (NoSuchElementException e) {
-              if (!lenient)
+              if (!lenient) {
                 throw e;
-              // if running in lenient mode, we may not have deserialized the value of this
-              // feature because it was of unknown type. So set it to null.
+              }
+              else {
+                // we may not have deserialized the value of this feature because it 
+                // was of unknown type.  We set it to null, and record in the
+                // out-of-typesystem data.
+                this.sharedData.addOutOfTypeSystemAttribute(
+                        addr, feat.getShortName(), Integer.toString(featVal));
+              }
             }
             cas.setFeatureValue(addr, feats[i], fsValAddr);
           }
@@ -981,7 +1070,6 @@
      * 
      * @param i
      */
-
     private void remapFSListHeads(int addr) {
       final int type = cas.getHeapValue(addr);
       if (!listUtils.isFsListType(type))
@@ -994,12 +1082,15 @@
       if (featVal != CASImpl.NULL) {
         int fsValAddr = CASImpl.NULL;
         try {
-          fsValAddr = fsTree.get(featVal);
+          fsValAddr = getFsAddrForXmiId(featVal);
         } catch (NoSuchElementException e) {
-          if (!lenient)
+          if (!lenient) {
             throw e;
-          // if running in lenient mode, we may not have deserialized the value of this
-          // element because it was of unknown type. So we set the element to null.
+          }
+          else {
+            //this may be a reference to an out-of-typesystem FS
+            this.sharedData.addOutOfTypeSystemAttribute(addr, CAS.FEATURE_BASE_NAME_HEAD, Integer.toString(featVal));
+          }
         }
         cas.setFeatureValue(addr, headFeat, fsValAddr);
       }
@@ -1024,12 +1115,16 @@
         if (arrayVal != CASImpl.NULL) {
           int arrayValAddr = CASImpl.NULL;
           try {
-            arrayValAddr = fsTree.get(arrayVal);
+            arrayValAddr = getFsAddrForXmiId(arrayVal);
           } catch (NoSuchElementException e) {
-            if (!lenient)
+            if (!lenient) {
               throw e;
-            // if running in lenient mode, we may not have deserialized the value of this
-            // element because it was of unknown type. So we set the element to null.
+            }
+            else {  
+              // the array element may be out of typesystem.  In that case set it
+              // to null, but record the id so we can add it back on next serialization.
+              this.sharedData.addOutOfTypeSystemArrayElement(addr, i, arrayVal);
+            }
           }
           cas.setArrayValue(addr, i, arrayValAddr);
         }
@@ -1142,8 +1237,64 @@
       }
       return cas.ll_getTypeClass(type);
     }
-  }
+    
+    /**
+     * Gets the FS address into which the XMI element with the given ID
+     * was deserialized.  This method supports merging multiple XMI documents
+     * into a single CAS, by checking the XmiSerializationSharedData
+     * structure to get the address of elements that were skipped during this
+     * deserialization but were deserialized during a previous deserialization.
+     * 
+     * @param xmiId
+     * @return
+     */
+    private int getFsAddrForXmiId(int xmiId) {
+      int addr = sharedData.getFsAddrForXmiId(xmiId);
+      if (addr > 0)
+        return addr;
+      else
+        throw new java.util.NoSuchElementException();
+    }
+    
+    /**
+     * Adds a feature sturcture to the out-of-typesystem data.  Also sets the
+     * this.outOfTypeSystemElement field, which is referred to later if we have to
+     * handle features recorded as child elements.
+     */
+    private void addToOutOfTypeSystemData(XmlElementName xmlElementName, Attributes attrs)
+            throws XCASParsingException {
+      this.outOfTypeSystemElement = new OotsElementData();
+      this.outOfTypeSystemElement.elementName = xmlElementName;
+      String attrName, attrValue;
+      for (int i = 0; i < attrs.getLength(); i++) {
+        attrName = attrs.getQName(i);
+        attrValue = attrs.getValue(i);
+        if (attrName.equals(ID_ATTR_NAME)) {
+          this.outOfTypeSystemElement.xmiId = attrValue;
+        }
+        else {
+          this.outOfTypeSystemElement.attributes.add(
+                  new XmlAttribute(attrName, attrValue));
+        }
+      }
+      this.sharedData.addOutOfTypeSystemElement(this.outOfTypeSystemElement);
+    }    
 
+    /**
+     * Adds a feature to the out-of-typesystem features list.
+     * @param ootsElem object to which to add the feature
+     * @param featName name of feature
+     * @param featVals feature values, as a list of strings
+     */
+    private void addOutOfTypeSystemFeature(OotsElementData ootsElem, String featName, List featVals) {
+      Iterator iter = featVals.iterator();
+      XmlElementName elemName = new XmlElementName(null,featName,featName);
+      while (iter.hasNext()) {
+        ootsElem.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
+      }
+    } 
+  }
+  
   private TypeSystemImpl ts;
 
   private Map xmiNamespaceToUimaNamespaceMap = new HashMap();
@@ -1191,7 +1342,7 @@
    * @return The <code>DefaultHandler</code> to pass to the SAX parser.
    */
   public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient) {
-    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null);
+    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null, -1);
   }
 
   /**
@@ -1213,8 +1364,35 @@
    */
   public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
           XmiSerializationSharedData sharedData) {
-    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData);
+    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, -1);
   }
+  
+  /**
+   * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
+   * meaning that if the XMI references Types that are not in the Type System, an Exception will be
+   * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
+   * and ignore any unknown types.
+   * 
+   * @param cas
+   *          This CAS will be used to hold the data deserialized from the XMI
+   * @param lenient
+   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
+   *          exception. The default is false.
+   * @param sharedData
+   *          data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
+   *          information.
+   * @param mergePoint
+   *          used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
+   *          deserialization will be done, meaning the target CAS will be reset and the entire XMI
+   *          content will be deserialized. If the mergePoint is nonnegative (including 0), the
+   *          target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
+   *          greater than the mergePoint value will be deserialized.
+   * @return The <code>DefaultHandler</code> to pass to the SAX parser.
+   */
+  public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
+          XmiSerializationSharedData sharedData, int mergePoint) {
+    return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint);
+  }  
 
   /**
    * Deserializes a CAS from XMI.
@@ -1231,7 +1409,7 @@
    *           if an I/O failure occurs
    */
   public static void deserialize(InputStream aStream, CAS aCAS) throws SAXException, IOException {
-    XmiCasDeserializer.deserialize(aStream, aCAS, false);
+    XmiCasDeserializer.deserialize(aStream, aCAS, false, null, -1);
   }
 
   /**
@@ -1253,13 +1431,70 @@
    */
   public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient)
           throws SAXException, IOException {
+    deserialize(aStream, aCAS, aLenient, null, -1);
+  }
+
+  /**
+   * Deserializes a CAS from XMI.
+   * 
+   * @param aStream
+   *          input stream from which to read the XCMI document
+   * @param aCAS
+   *          CAS into which to deserialize. This CAS must be set up with a type system that is
+   *          compatible with that in the XMI
+   * @param aLenient
+   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
+   *          exception. The default is false.
+   * @param aSharedData
+   *          an optional container for data that is shared between the {@link XmiCasSerializer} and the 
+   *          {@link XmiCasDeserializer}.  See the JavaDocs for {@link XmiSerializationSharedData} for details.
+   * 
+   * @throws SAXException
+   *           if an XML Parsing error occurs
+   * @throws IOException
+   *           if an I/O failure occurs
+   */
+  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
+          XmiSerializationSharedData aSharedData)
+          throws SAXException, IOException {
+    deserialize(aStream, aCAS, aLenient, aSharedData, -1);
+  }
+  
+  /**
+   * Deserializes a CAS from XMI.  This version of this method supports merging multiple XMI documents into a single CAS.
+   * 
+   * @param aStream
+   *          input stream from which to read the XCMI document
+   * @param aCAS
+   *          CAS into which to deserialize. This CAS must be set up with a type system that is
+   *          compatible with that in the XMI
+   * @param aLenient
+   *          if true, unknown Types will be ignored. If false, unknown Types will cause an
+   *          exception. The default is false.
+   * @param aSharedData
+   *          a container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+   *          See the JavaDocs for {@link XmiSerializationSharedData} for details.
+   * @param aMergePoint
+   *          used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
+   *          deserialization will be done, meaning the target CAS will be reset and the entire XMI
+   *          content will be deserialized. If the mergePoint is nonnegative (including 0), the
+   *          target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
+   *          greater than the mergePoint value will be deserialized. 
+   * @throws SAXException
+   *           if an XML Parsing error occurs
+   * @throws IOException
+   *           if an I/O failure occurs
+   */
+  public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
+          XmiSerializationSharedData aSharedData, int aMergePoint)
+          throws SAXException, IOException {
     XMLReader xmlReader = XMLReaderFactory.createXMLReader();
     XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
-    ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient);
+    ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint);
     xmlReader.setContentHandler(handler);
     xmlReader.parse(new InputSource(aStream));
-  }
-
+  }  
+  
   /**
    * Converts an XMI element name to a UIMA-style dotted type name.
    * 

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java Tue Feb 27 12:58:39 2007
@@ -37,8 +37,13 @@
 import org.apache.uima.cas.FSIndex;
 import org.apache.uima.cas.StringArrayFS;
 import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
 import org.apache.uima.internal.util.IntStack;
 import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
 import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
 import org.apache.uima.util.Level;
 import org.apache.uima.util.Logger;
@@ -188,19 +193,24 @@
      * Starts serialization
      */
     private void serialize() throws IOException, SAXException {
-      // populate nsUriToPrefixMap and xmiTypeNames structures based on CAS type system
+      // populate nsUriToPrefixMap and xmiTypeNames structures based on CAS 
+      // type system, and out of typesytem data if any
       initTypeAndNamespaceMappings();
 
       int iElementCount = 1; // start at 1 to account for special NULL object
 
+      enqueueIncoming(); //make sure we enqueue every FS that was deserialized into this CAS
       enqueueIndexed();
       enqueueFeaturesOfIndexed();
       iElementCount += indexedFSs.size();
       iElementCount += queue.size();
 
       FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
-      iElementCount += (sofaIndex.size() + 1); // one View element per sofa, plus 1 for base
-
+      iElementCount += (sofaIndex.size()); // one View element per sofa
+      if (this.sharedData != null) {
+        iElementCount += this.sharedData.getOutOfTypeSystemElements().size();
+      }
+      
       workAttrs.clear();
       computeNamespaceDeclarationAttrs(workAttrs);
       workAttrs.addAttribute(XMI_NS_URI, XMI_VERSION_LOCAL_NAME, XMI_VERSION_QNAME, "CDATA",
@@ -210,6 +220,7 @@
       writeNullObject(); // encodes 1 element
       encodeIndexed(); // encodes indexedFSs.size() element
       encodeQueued(); // encodes queue.size() elements
+      serializeOutOfTypeSystemElements(); //encodes sharedData.getOutOfTypeSystemElements().size() elements
       writeViews(); // encodes cas.sofaCount + 1 elements
       endElement(XMI_TAG);
     }
@@ -237,15 +248,25 @@
       if (sofaXmiId != null && sofaXmiId.length() > 0) {
         addAttribute(workAttrs, "sofa", sofaXmiId);
       }
-      if (members.length > 0) {
-        StringBuffer membersString = new StringBuffer();
-        for (int i = 0; i < members.length; i++) {
-          String xmiId = getXmiId(members[i]);
-          if (xmiId != null) // to catch filtered FS
-          {
-            membersString.append(xmiId).append(' ');
+      StringBuffer membersString = new StringBuffer();
+      for (int i = 0; i < members.length; i++) {
+        String xmiId = getXmiId(members[i]);
+        if (xmiId != null) // to catch filtered FS
+        {
+          membersString.append(xmiId).append(' ');
+        }
+      }
+      //check for out-of-typesystem members
+      if (this.sharedData != null) {
+        List ootsMembers = this.sharedData.getOutOfTypeSystemViewMembers(sofaXmiId);
+        if (ootsMembers != null) {
+          Iterator iter = ootsMembers.iterator();
+          while (iter.hasNext()) {
+            membersString.append((String)iter.next()).append(' ');
           }
         }
+      }
+      if (membersString.length() > 0) {
         // remove trailing space before adding to attributes
         addAttribute(workAttrs, "members", membersString.substring(0, membersString.length() - 1));
       }
@@ -296,13 +317,29 @@
     }
 
     /**
+     * Enqueues all FS that are stored in the XmiSerializationSharedData's id map.
+     * This map is populated during the previous deserialization.  This method
+     * is used to make sure that all incoming FS are echoed in the next
+     * serialization.
+     */
+    private void enqueueIncoming() {
+      if (this.sharedData == null)
+        return;
+      
+      int[] fsAddrs = this.sharedData.getAllFsAddressesInIdMap();
+      for (int i = 0; i < fsAddrs.length; i++) {
+        enqueueIndexedFs(fsAddrs[i]);
+      }
+    }
+    
+    /**
      * Push the indexed FSs onto the queue.
      */
     private void enqueueIndexed() {
       FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getBaseCAS().getBaseIndexRepository();
       int[] fsarray = ir.getIndexedFSs();
       for (int k = 0; k < fsarray.length; k++) {
-        enqueueIndexedFs(fsarray[k], 0);
+        enqueueIndexedFs(fsarray[k]);
       }
 
       // FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
@@ -319,7 +356,7 @@
         if (loopIR != null) {
           fsarray = loopIR.getIndexedFSs();
           for (int k = 0; k < fsarray.length; k++) {
-            enqueueIndexedFs(fsarray[k], sofaNum);
+            enqueueIndexedFs(fsarray[k]);
           }
         }
       }
@@ -340,7 +377,7 @@
     /**
      * Enqueues an indexed FS. Does NOT enqueue features at this point.
      */
-    private void enqueueIndexedFs(int addr, int indexRep) {
+    private void enqueueIndexedFs(int addr) {
       if (isVisited(addr)) {
         return;
       }
@@ -729,7 +766,7 @@
           case TYPE_CLASS_FLOATLIST:
           case TYPE_CLASS_FSLIST: {
             // If the feature has multipleReferencesAllowed = true OR if we're already
-            // inside another list node (i.e. this is the "tail" feature).
+            // inside another list node (i.e. this is the "tail" feature), serialize as a normal FS.
             // Otherwise, serialize as a multi-valued property.
             if (cas.ts.getFeature(feats[i]).isMultipleReferencesAllowed() || insideListNode) {
               attrValue = getXmiId(featVal);
@@ -768,6 +805,21 @@
           addAttribute(attrs, featName, attrValue);
         }
       }
+      
+      //add out-of-typesystem features, if any
+      if (this.sharedData != null) {
+        OotsElementData oed = this.sharedData.getOutOfTypeSystemFeatures(addr);
+        if (oed != null) {
+          //attributes
+          Iterator attrIter = oed.attributes.iterator();
+          while (attrIter.hasNext()) {
+            XmlAttribute attr = (XmlAttribute)attrIter.next();
+            addAttribute(workAttrs, attr.name, attr.value);
+          }
+          //child elements
+          childElements.addAll(oed.childElements);
+        }
+      }
       return childElements;
     }
 
@@ -832,6 +884,9 @@
       String elemStr = null;
       if (arrayType == LowLevelCAS.TYPE_CLASS_FSARRAY) {
         int pos = cas.getArrayStartAddress(addr);
+        List ootsArrayElementsList = this.sharedData == null ? null : 
+                this.sharedData.getOutOfTypeSystemArrayElements(addr);
+        int ootsIndex = 0;
         for (int j = 0; j < size; j++) {
           int heapValue = cas.getHeapValue(pos++);
           elemStr = null;
@@ -842,6 +897,17 @@
             // special NULL object with xmi:id=0 is used to represent
             // a null in an FSArray
             elemStr = "0";
+            // However, this null array element might have been a reference to an 
+            //out-of-typesystem FS, so check the ootsArrayElementsList
+            if (ootsArrayElementsList != null) {
+              while (ootsIndex < ootsArrayElementsList.size()) {
+                XmiArrayElement arel =(XmiArrayElement)ootsArrayElementsList.get(ootsIndex++);
+                if (arel.index == j) {
+                  elemStr = arel.xmiId;
+                  break;
+                }                
+              }
+            }
           }
           if (buf.length() > 0) {
             buf.append(' ');
@@ -990,6 +1056,22 @@
       nsUriToPrefixMap.put(XMI_NS_URI, XMI_NS_PREFIX);
       xmiTypeNames = new XmlElementName[cas.ts.getLargestTypeCode() + 1];
 
+      //Add any namespace prefix mappings used by out of type system data.
+      //Need to do this before the in-typesystem namespaces so that the prefix
+      //used here are reserved and won't be reused for any in-typesystem namespaces.
+      if (this.sharedData != null) {
+        Iterator ootsIter = this.sharedData.getOutOfTypeSystemElements().iterator();
+        while (ootsIter.hasNext()) {
+          OotsElementData oed = (OotsElementData)ootsIter.next();
+          String nsUri = oed.elementName.nsUri;
+          String qname = oed.elementName.qName;
+          String localName = oed.elementName.localName;
+          String prefix = qname.substring(0, qname.indexOf(localName)-1);
+          nsUriToPrefixMap.put(nsUri, prefix);
+          nsPrefixesUsed.add(prefix);
+        }
+      }
+      
       Iterator it = cas.ts.getTypeIterator();
       while (it.hasNext()) {
         TypeImpl t = (TypeImpl) it.next();
@@ -1046,35 +1128,55 @@
 
       return new XmlElementName(nsUri, shortName, prefix + ':' + shortName);
     }
-  }
-
-  /**
-   * Inner class used to encapsulate the different pieces of information that make up the name of an
-   * XML element - namely, the Namespace URI, the local name, and the qname (qualified name).
-   */
-  static class XmlElementName {
-    XmlElementName(String nsUri, String localName, String qName) {
-      this.nsUri = nsUri;
-      this.localName = localName;
-      this.qName = qName;
-    }
-
-    String nsUri;
-
-    String localName;
-
-    String qName;
-  }
-
-  static class XmlElementNameAndContents {
-    XmlElementNameAndContents(XmlElementName name, String contents) {
-      this.name = name;
-      this.contents = contents;
-    }
-
-    XmlElementName name;
-
-    String contents;
+    
+    /**
+     * Serializes all of the out-of-typesystem elements that were recorded
+     * in the XmiSerializationSharedData during the last deserialization.
+     */
+    private void serializeOutOfTypeSystemElements() throws SAXException {
+      if (this.sharedData == null)
+        return;
+      Iterator it = this.sharedData.getOutOfTypeSystemElements().iterator();
+      while (it.hasNext()) {
+        OotsElementData oed = (OotsElementData)it.next();
+        workAttrs.clear();
+        // Add ID attribute
+        addAttribute(workAttrs, ID_ATTR_NAME, oed.xmiId);
+
+        // Add other attributes
+        Iterator attrIt = oed.attributes.iterator();
+        while (attrIt.hasNext()) {
+          XmlAttribute attr = (XmlAttribute) attrIt.next();
+          addAttribute(workAttrs, attr.name, attr.value);
+        }
+        
+        // serialize element
+        startElement(oed.elementName, workAttrs, oed.childElements.size());
+        
+        //serialize features encoded as child elements
+        Iterator childElemIt = oed.childElements.iterator();
+        while (childElemIt.hasNext()) {
+          XmlElementNameAndContents child = (XmlElementNameAndContents)childElemIt.next();
+          workAttrs.clear();
+          Iterator attrIter = child.attributes.iterator();
+          while (attrIter.hasNext()) {
+            XmlAttribute attr =(XmlAttribute)attrIter.next();
+            addAttribute(workAttrs, attr.name, attr.value);
+          }
+          
+          if (child.contents != null) {
+            startElement(child.name, workAttrs, 1);
+            addText(child.contents);
+          }
+          else {
+            startElement(child.name, workAttrs, 0);            
+          }
+          endElement(child.name);
+        }
+        
+        endElement(oed.elementName);
+      }
+    } 
   }
 
   public static final String XMI_NS_URI = "http://www.omg.org/XMI";
@@ -1139,6 +1241,7 @@
    *          the <code>serialize</code> method that contains types and features that are not in
    *          this typesystem, the serialization will not contain instances of those types or values
    *          for those features. So this can be used to filter the results of serialization.
+   *          A null value indicates that all types and features  will be serialized.
    */
   public XmiCasSerializer(TypeSystem ts) {
     this(ts, (Map) null);
@@ -1258,9 +1361,7 @@
    *           if an I/O failure occurs
    */
   public static void serialize(CAS aCAS, OutputStream aStream) throws SAXException, IOException {
-    XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aCAS.getTypeSystem());
-    XMLSerializer sax2xml = new XMLSerializer(aStream, false);
-    xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler());
+    serialize(aCAS, null, aStream, false, null);
   }
 
   /**
@@ -1271,7 +1372,8 @@
    *          CAS to serialize.
    * @param aTargetTypeSystem
    *          type system to which the produced XMI will conform. Any types or features not in the
-   *          target type system will not be serialized.
+   *          target type system will not be serialized.  A null value indicates that all types and features
+   *          will be serialized.
    * @param aStream
    *          output stream to which to write the XMI document
    * 
@@ -1282,8 +1384,36 @@
    */
   public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream)
           throws SAXException, IOException {
-    XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
-    XMLSerializer sax2xml = new XMLSerializer(aStream, false);
-    xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler());
+    serialize(aCAS, aTargetTypeSystem, aStream, false, null);
   }
+  
+  /**
+   * Serializes a CAS to an XMI stream.  This version of this method allows many options to be configured.
+   * 
+   * @param aCAS
+   *          CAS to serialize.
+   * @param aTargetTypeSystem
+   *          type system to which the produced XMI will conform. Any types or features not in the
+   *          target type system will not be serialized.  A null value indicates that all types and features
+   *          will be serialized.
+   * @param aStream
+   *          output stream to which to write the XMI document
+   * @param aPrettyPrint
+   *          if true the XML output will be formatted with newlines and indenting.  If false it will be unformatted.
+   * @param aSharedData
+   *          an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+   *          See the JavaDocs for {@link XmiSerializationSharedData} for details.
+   * 
+   * @throws SAXException
+   *           if a problem occurs during XMI serialization
+   * @throws IOException
+   *           if an I/O failure occurs
+   */
+  public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint, 
+          XmiSerializationSharedData aSharedData)
+          throws SAXException, IOException {
+    XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
+    XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint);
+    xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData);
+  }  
 }

Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java Tue Feb 27 12:58:39 2007
@@ -19,26 +19,82 @@
 
 package org.apache.uima.cas.impl;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
 import java.util.Set;
 
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
 import org.apache.uima.internal.util.rb_trees.RedBlackTree;
 
 /**
- * Holds information that is shared between the XmiCasSerializer and the XmiCasDeserializer. This
- * allows consistency of XMI IDs across serializations, and also provides the ability to filter out
- * some FSs during serialization (e.g. to send to a service) and then reintegrate those FSs during
- * the next deserialization.
+ * A container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+ * This has a number of uses:
+ * <ul>
+ *   <li>Consistency of xmi:id values across serializations.  If you pass an <code>XmiSerializationSharedData</code>
+ *       instance to the deserializer, the deserializer will store information about the incoming xmi:id values.  If you then pass
+ *       the same <code>XmiSerializationSharedData</code> object to the serializer when you attempt to serialize the same CAS,
+ *       all of the incoming FS will be serialized using the same xmi:id value that they had when they were deserialized.</li>
+ *   <li>Support for "out-of-typesystem data".  If you call the XMI deserializer with the <code>lenient</code> parameter set to true,
+ *       whenever it encounters an XMI element that doesn't correspond to a type in the type system, it will populate the
+ *       <code>XmiSerializationSharedData</code> with information about these elements.  If you then pass the same 
+ *       <code>XmiSerializationSharedData</code> object to the serializer when you attempt to serialize the same CAS, these
+ *       out-of-typesystem FS will be reserialized without loss of information.  References between in-typesystem and out-of-typesystem
+ *       FS (in either direction) are maintained as well.</li>
+ *   <li>After calling the XmiCasSerializer and passing an <code>XmiSerializationSharedData</code>, you can call the
+ *       {@link #getMaxXmiId()} method to get the maximum xmi:id value in the serialized CAS.  This feature, along with the consistency of
+ *       xmi:id values, allows merging multiple XMI documents into a single CAS.  See TODO.</li>
+ *       
  * 
  */
 public class XmiSerializationSharedData {
   /**
-   * A map from FeatureStructure address to XMI ID. This is built during deserialization, then used
-   * by the next serialization to ensure consistent IDs.
+   * A map from FeatureStructure address to xmi:id. This is populated whenever
+   * an XMI element is serialized or deserialized.  It is used by the
+   * getXmiId() method, which is done to ensure a consistent ID for each FS 
+   * address across multiple serializations.
    */
   private RedBlackTree fsAddrToXmiIdMap = new RedBlackTree();
+  
+  /** 
+   * A map from xmi:id to FeatureStructure address.  This is populated whenever
+   * an XMI element is serialized or deserialized.  It is used by the
+   * getFsAddrForXmiId() method, necessary to support merging multiple XMI
+   * CASes into the same CAS object.
+   **/
+  private RedBlackTree xmiIdToFsAddrMap = new RedBlackTree();
+  
+  /**
+   * List of OotsElementData objects, each of which captures information about
+   * incoming XMI elements that did not correspond to any type in the type system.
+   */
+  private List ootsFs = new ArrayList();
+  
+  /**
+   * Map that from the xmi:id (String) of a Sofa to a List of xmi:id's (Strings) for
+   * the out-of-typesystem FSs that are members of that Sofa's view.
+   */
+  private Map ootsViewMembers = new HashMap();
 
+  /** Map from Feature Structure address (Integer) to OotsElementData object, capturing information 
+   * about out-of-typesystem features that were part of an in-typesystem FS.  These include both
+   * features not defined in the typesystem and features that are references to out-of-typesystem
+   * elements.  This information needs to be included when the FS is subsequently serialized.
+   */
+  private Map ootsFeatures = new HashMap();
+  
+  /** Map from Feature Structure address (Integer) of an FSArray to a list of 
+   * {@link XmiArrayElement} objects, each of which holds an index and an xmi:id
+   * for an out-of-typesystem array element.
+   */
+  private Map ootsArrayElements = new HashMap();
+  
   /**
    * The maximum XMI ID used in the serialization. Used to generate unique IDs if needed.
    */
@@ -46,6 +102,7 @@
 
   void addIdMapping(int fsAddr, int xmiId) {
     fsAddrToXmiIdMap.put(fsAddr, Integer.toString(xmiId));
+    xmiIdToFsAddrMap.put(xmiId, new Integer(fsAddr));
     if (xmiId > maxXmiId)
       maxXmiId = xmiId;
   }
@@ -60,14 +117,178 @@
       // to be sure we get a unique Id, increment maxXmiId and use that
       String idStr = Integer.toString(++maxXmiId);
       fsAddrToXmiIdMap.put(fsAddr, idStr);
+      xmiIdToFsAddrMap.put(maxXmiId, new Integer(fsAddr));
       return idStr;
     }
   }
 
+  
+  /**
+   * Gets the maximum xmi:id that has been generated or read so far.
+   * @return the maximum xmi:id
+   */
+  public int getMaxXmiId() {
+    return maxXmiId;
+  }
+  
+  /**
+   * Gets the FS address that corresponds to the given xmi:id, in the most
+   * recent serialization or deserialization.
+   *   
+   * @param xmiId an xmi:id from the most recent XMI CAS that was serialized
+   *   or deserialized.
+   * @return the FS address of the FeatureStructure corresponding to that
+   *   xmi:id, -1 if none.
+   */
+  public int getFsAddrForXmiId(int xmiId) {
+    Integer addr = (Integer)xmiIdToFsAddrMap.get(xmiId);
+    return addr == null ? -1 : addr.intValue();
+  }
+  
+  /** 
+   * Clears the ID mapping information that was populated in
+   * previous serializations or deserializations.
+   * TODO: maybe a more general reset that resets other things?
+   */
   public void clearIdMap() {
     fsAddrToXmiIdMap.clear();
+    xmiIdToFsAddrMap.clear();
+    maxXmiId = 0;
+  }
+  
+  /**
+   * Records information about an XMI element that was not an instance of any type in the type system.
+   * @param elemData information about the out-of-typesystem XMI element
+   */
+  public void addOutOfTypeSystemElement(OotsElementData elemData) {
+    this.ootsFs.add(elemData);
+  }
+
+  /**
+   * Gets a List of {@link OotsElementData} objects, each of which describes an
+   * incoming XMI element that did not correspond to a Type in the TypeSystem.
+   * @return List of {@link OotsElementData} objects
+   */
+  public List getOutOfTypeSystemElements() {
+    return Collections.unmodifiableList(this.ootsFs);
+  }
+  
+  /**
+   * Records that an out-of-typesystem XMI element should be a member of the
+   * specified view.
+   * @param sofaXmiId xmi:id of a Sofa
+   * @param memberXmiId xmi:id of an out-of-typesystem element that should be
+   *   a member of the view for the given Sofa
+   */
+  public void addOutOfTypeSystemViewMember(String sofaXmiId, String memberXmiId) {
+    List membersList = (List)this.ootsViewMembers.get(sofaXmiId);
+    if (membersList == null) {
+      membersList = new ArrayList();
+      this.ootsViewMembers.put(sofaXmiId, membersList);
+    }
+    membersList.add(memberXmiId);
+  }
+  
+  /**
+   * Gets a List of xmi:id's (Strings) of all out-of-typesystem XMI elements
+   * that are members of the view with the given id.
+   * @param sofaXmiId xmi:id of a Sofa
+   * @return List of xmi:id's of members of the view for the given Sofa.
+   */
+  public List getOutOfTypeSystemViewMembers(String sofaXmiId) {
+    List members = (List)this.ootsViewMembers.get(sofaXmiId);
+    return members == null ? null : Collections.unmodifiableList(members);
+  }
+  
+  /**
+   * Records an out-of-typesystem attribute that belongs to an in-typesystem FS.
+   * This will be added to the attributes when that FS is reserialized.
+   * @param addr CAS address of the FS 
+   * @param featName name of the feature
+   * @param featVal value of the feature, as a string
+   */
+  public void addOutOfTypeSystemAttribute(int addr, String featName, String featVal) {
+    Integer key = new Integer(addr);
+    OotsElementData oed = (OotsElementData)this.ootsFeatures.get(key);
+    if (oed == null) {
+      oed = new OotsElementData();
+      this.ootsFeatures.put(key, oed);
+    }
+    oed.attributes.add(new XmlAttribute(featName, featVal));
+  }  
+  
+  /**
+   * Records out-of-typesystem child elements that belong to an in-typesystem FS.
+   * These will be added to the child elements when that FS is reserialized.
+   * @param addr CAS address of the FS 
+   * @param featName name of the feature (element tag name)
+   * @param featVal values of the feature, as a List of strings
+   */
+  public void addOutOfTypeSystemChildElements(int addr, String featName, List featVals) {
+    Integer key = new Integer(addr);
+    OotsElementData oed = (OotsElementData)this.ootsFeatures.get(key);
+    if (oed == null) {
+      oed = new OotsElementData();
+      this.ootsFeatures.put(key, oed);
+    }
+    Iterator iter = featVals.iterator();
+    XmlElementName elemName = new XmlElementName(null,featName,featName);
+    while (iter.hasNext()) {
+      oed.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
+    }
+  }  
+  
+  /**
+   * Gets information about out-of-typesystem features that belong to an
+   * in-typesystem FS.
+   * @param addr CAS address of the FS
+   * @return object containing information about out-of-typesystem features
+   *   (both attributes and child elements)
+   */
+  public OotsElementData getOutOfTypeSystemFeatures(int addr) {
+    Integer key = new Integer(addr);
+    return (OotsElementData)this.ootsFeatures.get(key);
+  }
+  
+  /**
+   * Get all FS Addresses that have been added to the id map.
+   * @return an array containing all the FS addresses
+   */
+  public int[] getAllFsAddressesInIdMap() {
+    return fsAddrToXmiIdMap.keySet();
+  }  
+  
+  /**
+   * Gets information about out-of-typesystem array elements.
+   * @param addr the CAS address of an FSArray
+   * @return a List of {@link XmiArrayElement} objects, each of which
+   *   holds the index and xmi:id of an array element that is a
+   *   reference to an out-of-typesystem FS.
+   */
+  public List getOutOfTypeSystemArrayElements(int addr) {
+    return (List)this.ootsArrayElements.get(new Integer(addr));
   }
+  
 
+  /**
+   * Records an out-of-typesystem array element in the XmiSerializationSharedData.
+   * @param addr CAS address of FSArray
+   * @param index index into array 
+   * @param xmiId xmi:id of the out-of-typesystem element that is the value at the given index
+   */
+  public void addOutOfTypeSystemArrayElement(int addr, int index, int xmiId) {
+    Integer key = new Integer(addr);
+    List list = (List)this.ootsArrayElements.get(key);
+    if (list == null) {
+      list = new ArrayList();
+      this.ootsArrayElements.put(key, list);
+    }
+    list.add(new XmiArrayElement(index, Integer.toString(xmiId)));
+  }
+  
+  /**
+   * For debugging purposes only.
+   */
   void checkForDups() {
     Set ids = new HashSet();
     Iterator iter = fsAddrToXmiIdMap.iterator();
@@ -79,6 +300,9 @@
     }
   }
 
+  /**
+   * For debugging purposes only.
+   */
   public String toString() {
     StringBuffer buf = new StringBuffer();
     int[] keys = fsAddrToXmiIdMap.keySet();
@@ -86,5 +310,47 @@
       buf.append(keys[i]).append(": ").append(fsAddrToXmiIdMap.get(keys[i])).append('\n');
     }
     return buf.toString();
+  }
+
+  /**
+   * Data structure holding all information about an XMI element
+   * containing an out-of-typesystem FS.
+   */
+  static class OotsElementData {
+    /**
+     * xmi:id of the element
+     */
+    String xmiId;
+
+    /**
+     * Name of the element, including XML namespace.
+     */
+    XmlElementName elementName;
+
+    /**
+     * List of XmlAttribute objects each holding name and value of an attribute.
+     */
+    List attributes = new ArrayList();
+    
+    /**
+     * List of XmlElementNameAndContents objects each describing one of the
+     * child elements representing features of this out-of-typesystem element.
+     */
+    List childElements = new ArrayList();
+  }
+  
+  /** 
+   * Data structure holding the index and the xmi:id of an array or list element that
+   * is a reference to an out-of-typesystem FS.
+   */
+  static class XmiArrayElement {
+    int index;
+
+    String xmiId;
+
+    XmiArrayElement(int index, String xmiId) {
+      this.index = index;
+      this.xmiId = xmiId;
+    }
   }
 }

Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+/**
+ * Data structure representing an XML attribute.
+ */
+public class XmlAttribute {
+  public String name;
+  public String value;
+  
+  public XmlAttribute(String name, String value) {
+    this.name = name;
+    this.value = value;
+  }
+}

Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,20 @@
+package org.apache.uima.internal.util;
+
+/**
+ * Data structure used to encapsulate the different pieces of information that 
+ * make up the name of an XML element - namely, the Namespace URI, the local 
+ * name, and the qname (qualified name).
+ */
+public class XmlElementName {
+  public XmlElementName(String nsUri, String localName, String qName) {
+    this.nsUri = nsUri;
+    this.localName = localName;
+    this.qName = qName;
+  }
+
+  public String nsUri;
+
+  public String localName;
+
+  public String qName;
+}
\ No newline at end of file

Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Data structure used to encapsulate an XML element name (including Namespace URI, 
+ * local name, and the qname) as well as its attributes and character content.
+ */
+public class XmlElementNameAndContents {
+  public XmlElementNameAndContents(XmlElementName name, String contents) {
+    this(name, contents, Collections.EMPTY_LIST);
+  }
+
+  public XmlElementNameAndContents(XmlElementName name, String contents, List attributes) {
+    this.name = name;
+    this.contents = contents;
+    this.attributes = attributes;
+  }
+
+  public XmlElementName name;
+  
+  /**
+   * List of XmlAttribute objects each holding name and value of an attribute.
+   */
+  public List attributes;
+
+  public String contents;
+}
\ No newline at end of file