You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by al...@apache.org on 2007/02/27 21:58:40 UTC
svn commit: r512404 [1/2] - in /incubator/uima/uimaj/trunk/uimaj-core/src:
main/java/org/apache/uima/cas/impl/ main/java/org/apache/uima/internal/util/
test/java/org/apache/uima/cas/impl/
test/java/org/apache/uima/cas_data/impl/ test/resources/ExampleCas/
Author: alally
Date: Tue Feb 27 12:58:39 2007
New Revision: 512404
URL: http://svn.apache.org/viewvc?view=rev&rev=512404
Log:
XMI CAS Serializer/Deserializer updates for merging and
out-of-typesystem data.
UIMA-325: http://issues.apache.org/jira/browse/UIMA-325
UIMA-326: http://issues.apache.org/jira/browse/UIMA-326
Added:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java
incubator/uima/uimaj/trunk/uimaj-core/src/test/resources/ExampleCas/simpleCas.xmi
Modified:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java
incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasDeserializerTest.java
incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas_data/impl/XCasToCasDataSaxHandlerTest.java
incubator/uima/uimaj/trunk/uimaj-core/src/test/resources/ExampleCas/partialTestTypeSystem.xml
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/ListUtils.java Tue Feb 27 12:58:39 2007
@@ -24,7 +24,9 @@
import java.util.ListIterator;
import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.XmlAttribute;
import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
@@ -255,15 +257,27 @@
curNode = addr;
for (int i = 0; i < length; i++) {
int heapVal = cas.getHeapValue(curNode + cas.getFeatureOffset(fsHeadFeat));
- if (sharedData != null) {
- strArray[i] = sharedData.getXmiId(heapVal);
- } else {
- strArray[i] = Integer.toString(heapVal);
- }
- if (strArray[i] == null) {
- // special NULL object with xmi:id=0 is used to represent
- // a null in an FSArray
+ if (heapVal == 0) {
+ //null value in list. Represent with "0".
strArray[i] = "0";
+ // However, this may be null because the element was originally a reference to an
+ // out-of-typesystem FS, so chck the XmiSerializationSharedData
+ if (sharedData != null) {
+ OotsElementData oed = sharedData.getOutOfTypeSystemFeatures(curNode);
+ if (oed != null) {
+ assert oed.attributes.size() == 1; //only the head feature can possibly be here
+ XmlAttribute attr = (XmlAttribute)oed.attributes.get(0);
+ assert CAS.FEATURE_BASE_NAME_HEAD.equals(attr.name);
+ strArray[i] = attr.value;
+ }
+ }
+ }
+ else {
+ if (sharedData != null) {
+ strArray[i] = heapVal == 0 ? null : sharedData.getXmiId(heapVal);
+ } else {
+ strArray[i] = Integer.toString(heapVal);
+ }
}
curNode = cas.getHeapValue(curNode + cas.getFeatureOffset(fsTailFeat));
}
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java Tue Feb 27 12:58:39 2007
@@ -43,10 +43,12 @@
import org.apache.uima.cas.SofaFS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
import org.apache.uima.internal.util.I18nUtil;
import org.apache.uima.internal.util.IntVector;
-import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
-import org.apache.uima.internal.util.rb_trees.IntRedBlackTree.IntRBTIterator;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
@@ -78,11 +80,16 @@
// Inside a feature element. We expect the feature value.
private static final int FEAT_CONTENT_STATE = 3;
-
+
// Inside an element with the XMI namespace - indicating content that's
// not part of the typesystem and should be ignored.
private static final int IGNORING_XMI_ELEMENTS_STATE = 4;
+ // Inside a reference feature element (e.g. <feat href="#1").
+ // We expect no content, just the end of the element.
+ private static final int REF_FEAT_STATE = 5;
+
+
// End parser states.
// ///////////////////////////////////////////////////////////////////////
@@ -98,11 +105,9 @@
// The CAS we're filling.
private CASImpl cas;
- // Store FSs with ID in a search tree (keys are xmi ID, values are CAS address)
- private IntRedBlackTree fsTree;
-
- // Store IDless FSs in a vector
- private IntVector idLess;
+ // Store address of every FS we've deserialized, since we need to back
+ // and apply fix-ups afterwards.
+ private IntVector deserializedFsAddrs;
// Store a separate vector of FSList nodes that were deserialized from multivalued properties.
// These are special because their "head" feature needs remapping but their "tail" feature
@@ -169,21 +174,53 @@
// with a non-namespace-enabled SAX parser.
private HashMap nsPrefixToUriMap = new HashMap();
+ // container for data shared between the XmiCasSerialier and
+ // XmiDeserializer, to support things such as consistency of IDs across
+ // multiple serializations. This is also where the map from xmi:id to
+ // FS address is stored.
private XmiSerializationSharedData sharedData;
// number of Sofas found so far
private int nextSofaNum;
+
+ //used for merging multiple XMI CASes into one CAS object.
+ private int mergePoint;
+
+ //Current out-of-typesystem element, if any
+ private OotsElementData outOfTypeSystemElement = null;
+ /**
+ * Creates a SAX handler used for deserializing an XMI CAS.
+ * @param aCAS CAS to deserialize into
+ * @param lenient if true, unknown types/features result in an
+ * exception. If false, unknown types/features are ignored.
+ * @param sharedData data structure used to allow the XmiCasSerializer and
+ * XmiCasDeserializer to share information.
+ * @param mergePoint used to support merging multiple XMI CASes. If the
+ * mergePoint is negative, "normal" deserialization will be done,
+ * meaning the target CAS will be reset and the entire XMI content will
+ * be deserialized. If the mergePoint is nonnegative (including 0), the
+ * target CAS will not be reset, and only Feature Structures whose
+ * xmi:id is strictly greater than the mergePoint value will be
+ * deserialized.
+ */
private XmiCasDeserializerHandler(CASImpl aCAS, boolean lenient,
- XmiSerializationSharedData sharedData) {
+ XmiSerializationSharedData sharedData, int mergePoint) {
super();
this.cas = aCAS.getBaseCAS();
this.lenient = lenient;
- this.sharedData = sharedData;
- // Reset the CAS. Necessary to get Sofas to work properly.
- cas.resetNoQuestions();
- this.fsTree = new IntRedBlackTree();
- this.idLess = new IntVector();
+ this.sharedData =
+ sharedData != null ? sharedData : new XmiSerializationSharedData();
+ this.mergePoint = mergePoint;
+ if (mergePoint < 0) {
+ //If not merging, reset the CAS.
+ //Necessary to get Sofas to work properly.
+ cas.resetNoQuestions();
+
+ // clear ID mappings stored in the SharedData (from previous deserializations)
+ this.sharedData.clearIdMap();
+ }
+ this.deserializedFsAddrs = new IntVector();
this.fsListNodesFromMultivaluedProperties = new IntVector();
this.buffer = new StringBuffer();
this.indexRepositories = new ArrayList();
@@ -197,11 +234,6 @@
this.nextSofaNum = 2;
this.listUtils = new ListUtils(cas, UIMAFramework.getLogger(XmiCasDeserializer.class), null);
- // clear ID mappings stored in the SharedData (from previous deserializations)
- if (this.sharedData != null) {
- this.sharedData.clearIdMap();
- }
-
// populate feature type table
this.featureType = new int[cas.ts.getNumberOfFeatures() + 1];
FeatureImpl feat;
@@ -268,6 +300,18 @@
this.ignoreDepth++;
return;
}
+ // if we're doing merging, skip elements whose ID is <= mergePoint
+ if (this.mergePoint >= 0) {
+ String id = attrs.getValue(ID_ATTR_NAME);
+ if (id != null) {
+ int idInt = Integer.parseInt(id);
+ if (idInt <= this.mergePoint) {
+ this.state = IGNORING_XMI_ELEMENTS_STATE;
+ this.ignoreDepth++;
+ return;
+ }
+ }
+ }
if (nameSpaceURI == null || nameSpaceURI.length() == 0) {
// parser may not be namespace-enabled, so try to resolve NS ourselves
int colonIndex = qualifiedName.indexOf(':');
@@ -285,16 +329,49 @@
}
}
- String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
-
- readFS(typeName, attrs);
+ readFS(nameSpaceURI, localName, qualifiedName, attrs);
multiValuedFeatures.clear();
state = FEAT_STATE;
break;
}
case FEAT_STATE: {
- state = FEAT_CONTENT_STATE;
+ //parsing a feature recorded as a child element
+ //check for an "href" feature, used for references
+ String href = attrs.getValue("href");
+ if (href != null && href.startsWith("#")) {
+ //for out-of-typesystem objects, there's special handling here
+ //to keep track of the fact this was an href so we re-serialize
+ //correctly.
+ if (this.outOfTypeSystemElement != null) {
+ XmlElementName elemName = new XmlElementName(nameSpaceURI, localName, qualifiedName);
+ List ootsAttrs = new ArrayList();
+ ootsAttrs.add(new XmlAttribute("href", href));
+ XmlElementNameAndContents elemWithContents = new XmlElementNameAndContents(elemName, null, ootsAttrs);
+ this.outOfTypeSystemElement.childElements.add(elemWithContents);
+ }
+ else {
+ //In-typesystem FS, so we can forget this was an href and just add
+ //the integer value, which will be interpreted as a reference later.
+ //NOTE: this will end up causing it to be reserialized as an attribute
+ //rather than an element, but that is not in violation of the XMI spec.
+ ArrayList valueList = (ArrayList) this.multiValuedFeatures.get(qualifiedName);
+ if (valueList == null) {
+ valueList = new ArrayList();
+ this.multiValuedFeatures.put(qualifiedName, valueList);
+ }
+ valueList.add(href.substring(1));
+ }
+ state = REF_FEAT_STATE;
+ }
+ else {
+ //non-reference feature, expecting feature value as character content
+ state = FEAT_CONTENT_STATE;
+ }
+ break;
+ }
+ case IGNORING_XMI_ELEMENTS_STATE: {
+ ignoreDepth++;
break;
}
default: {
@@ -305,7 +382,10 @@
}
// Create a new FS.
- private void readFS(String typeName, Attributes attrs) throws SAXParseException {
+ private void readFS(String nameSpaceURI, String localName, String qualifiedName,
+ Attributes attrs) throws SAXException {
+ String typeName = xmiElementName2uimaTypeName(nameSpaceURI, localName);
+
currentType = (TypeImpl) ts.getType(typeName);
if (currentType == null) {
// ignore NULL type
@@ -317,8 +397,12 @@
processView(attrs.getValue("sofa"), attrs.getValue("members"));
return;
}
+ // type is not in our type system
if (!lenient) {
throw createException(XCASParsingException.UNKNOWN_TYPE, typeName);
+ } else {
+ addToOutOfTypeSystemData(
+ new XmlElementName(nameSpaceURI, localName, qualifiedName), attrs);
}
return;
} else if (cas.isArrayType(currentType)) {
@@ -365,7 +449,7 @@
if (sofa != null) {
// translate sofa's xmi:id into its sofanum
int sofaXmiId = Integer.parseInt(sofa);
- int sofaAddr = fsTree.get(sofaXmiId);
+ int sofaAddr = getFsAddrForXmiId(sofaXmiId);
sofaNum = cas.getFeatureValue(sofaAddr, sofaNumFeatCode);
}
FSIndexRepositoryImpl indexRep = (FSIndexRepositoryImpl) indexRepositories.get(sofaNum);
@@ -374,18 +458,24 @@
// intermediate String[]?
String[] members = parseArray(membersString);
for (int i = 0; i < members.length; i++) {
+ int id = Integer.parseInt(members[i]);
+ //if merging, don't try to index anything below the merge point
+ if (id <= this.mergePoint) {
+ continue;
+ }
// have to map each ID to its "real" address (TODO: optimize?)
- int addr;
try {
- addr = fsTree.get(Integer.parseInt(members[i]));
+ int addr = getFsAddrForXmiId(id);
+ indexRep.addFS(addr);
} catch (NoSuchElementException e) {
- if (!lenient)
+ if (!lenient) {
throw e;
- // when running in lenient mode, we will have skipped FSs that
- // are of unknown types. So ignore members of the View which are not found.
- continue;
+ }
+ else {
+ //unknown view member may be an OutOfTypeSystem FS
+ this.sharedData.addOutOfTypeSystemViewMember(sofa, members[i]);
+ }
}
- indexRep.addFS(addr);
}
}
}
@@ -397,7 +487,7 @@
* @throws SAXParseException
*/
private void readFS(final int addr, Attributes attrs) throws SAXParseException {
- // Hang on address for setting content feature
+ // Hang on to address for handle features encoded as child elements
this.currentAddr = addr;
int id = -1;
String attrName, attrValue;
@@ -433,7 +523,7 @@
} else if (sofaTypeCode == typeCode && attrName.equals(CAS.FEATURE_BASE_NAME_SOFANUM)) {
attrValue = Integer.toString(thisSofaNum);
}
- handleFeature(type, addr, attrName, attrValue, false);
+ handleFeature(type, addr, attrName, attrValue);
}
}
if (sofaTypeCode == typeCode) {
@@ -451,13 +541,9 @@
((CASImpl) view).registerView(sofa);
views.add(view);
}
- if (id < 0) {
- idLess.add(addr);
- } else {
- fsTree.put(id, addr);
- if (sharedData != null) {
- sharedData.addIdMapping(addr, id);
- }
+ deserializedFsAddrs.add(addr);
+ if (id > 0) {
+ sharedData.addIdMapping(addr, id);
}
}
@@ -467,25 +553,29 @@
return ((val == null) || (val.length() == 0));
}
- private void handleFeature(final Type type, int addr, String featName, String featVal,
- boolean aLenient) throws SAXParseException {
+ private void handleFeature(final Type type, int addr, String featName, String featVal) throws SAXParseException {
final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
if (feat == null) {
- if (!aLenient) {
+ if (!this.lenient) {
throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
}
+ else {
+ sharedData.addOutOfTypeSystemAttribute(addr, featName, featVal);
+ }
return;
}
handleFeature(addr, feat.getCode(), featVal);
}
- private void handleFeature(final Type type, int addr, String featName, List featVals,
- boolean aLenient) throws SAXParseException {
+ private void handleFeature(final Type type, int addr, String featName, List featVals) throws SAXParseException {
final FeatureImpl feat = (FeatureImpl) type.getFeatureByBaseName(featName);
if (feat == null) {
- if (!aLenient) {
+ if (!this.lenient) {
throw createException(XCASParsingException.UNKNOWN_FEATURE, featName);
}
+ else {
+ sharedData.addOutOfTypeSystemChildElements(addr, featName, featVals);
+ }
return;
}
handleFeature(addr, feat.getCode(), featVals);
@@ -511,7 +601,7 @@
// special handling for "sofa" feature of annotation. Need to change
// it from a sofa reference into a sofa number
int sofaXmiId = Integer.parseInt(featVal);
- int sofaAddr = fsTree.get(sofaXmiId);
+ int sofaAddr = getFsAddrForXmiId(sofaXmiId);
int sofaNum = cas.getFeatureValue(sofaAddr, sofaNumFeatCode);
cas.setFeatureValue(addr, featCode, sofaNum);
} else {
@@ -751,13 +841,9 @@
cas.setArrayValueFromString(casArray, i, stringVal);
}
- if (xmiId < 0) {
- idLess.add(casArray);
- } else {
- fsTree.put(xmiId, casArray);
- if (sharedData != null) {
- sharedData.addIdMapping(casArray, xmiId);
- }
+ deserializedFsAddrs.add(casArray);
+ if (xmiId > 0) {
+ sharedData.addIdMapping(casArray, xmiId);
}
return casArray;
}
@@ -782,13 +868,9 @@
}
int arrayAddr = ((FeatureStructureImpl) fs).getAddress();
- if (xmiId < 0) {
- idLess.add(arrayAddr);
- } else {
- fsTree.put(xmiId, arrayAddr);
- if (sharedData != null) {
- sharedData.addIdMapping(arrayAddr, xmiId);
- }
+ deserializedFsAddrs.add(arrayAddr);
+ if (xmiId > 0) {
+ sharedData.addIdMapping(arrayAddr, xmiId);
}
return arrayAddr;
}
@@ -859,10 +941,26 @@
this.state = FEAT_STATE;
break;
}
+ case REF_FEAT_STATE: {
+ this.state = FEAT_STATE;
+ break;
+ }
case FEAT_STATE: {
// end of FS. Process multi-valued features or array elements that were
// encoded as subelements
- if (currentType != null) {
+ if (this.outOfTypeSystemElement != null) {
+ if (!this.multiValuedFeatures.isEmpty()) {
+ Iterator iter = this.multiValuedFeatures.entrySet().iterator();
+ while (iter.hasNext()) {
+ Map.Entry entry = (Map.Entry) iter.next();
+ String featName = (String) entry.getKey();
+ List featVals = (List) entry.getValue();
+ addOutOfTypeSystemFeature(outOfTypeSystemElement, featName, featVals);
+ }
+ }
+ this.outOfTypeSystemElement = null;
+ }
+ else if (currentType != null) {
if (cas.isArrayType(currentType) && !cas.isByteArrayType(currentType)) {
// create the array now. elements may have been provided either as
// attributes or child elements, but not both.
@@ -882,7 +980,7 @@
Map.Entry entry = (Map.Entry) iter.next();
String featName = (String) entry.getKey();
List featVals = (List) entry.getValue();
- handleFeature(currentType, currentAddr, featName, featVals, false);
+ handleFeature(currentType, currentAddr, featName, featVals);
}
}
}
@@ -905,24 +1003,9 @@
* @see org.xml.sax.ContentHandler#endDocument()
*/
public void endDocument() throws SAXException {
- // time = System.currentTimeMillis() - time;
- // System.out.println("Done reading xml data in " + new TimeSpan(time));
- // System.out.println(
- // "Resolving references for id data (" + fsTree.size() + ").");
- // time = System.currentTimeMillis();
-
// Resolve ID references, and add FSs to indexes
- IntRBTIterator it = fsTree.iterator();
- while (it.hasNext()) {
- finalizeFS(it.next());
- }
- // time = System.currentTimeMillis() - time;
- // System.out.println("Done in " + new TimeSpan(time));
- // System.out.println(
- // "Resolving references for non-id data (" + idLess.size() + ").");
- // time = System.currentTimeMillis();
- for (int i = 0; i < idLess.size(); i++) {
- finalizeFS(idLess.get(i));
+ for (int i = 0; i < deserializedFsAddrs.size(); i++) {
+ finalizeFS(deserializedFsAddrs.get(i));
}
for (int i = 0; i < fsListNodesFromMultivaluedProperties.size(); i++) {
remapFSListHeads(fsListNodesFromMultivaluedProperties.get(i));
@@ -961,12 +1044,18 @@
if (featVal != CASImpl.NULL) {
int fsValAddr = CASImpl.NULL;
try {
- fsValAddr = fsTree.get(featVal);
+ fsValAddr = getFsAddrForXmiId(featVal);
} catch (NoSuchElementException e) {
- if (!lenient)
+ if (!lenient) {
throw e;
- // if running in lenient mode, we may not have deserialized the value of this
- // feature because it was of unknown type. So set it to null.
+ }
+ else {
+ // we may not have deserialized the value of this feature because it
+ // was of unknown type. We set it to null, and record in the
+ // out-of-typesystem data.
+ this.sharedData.addOutOfTypeSystemAttribute(
+ addr, feat.getShortName(), Integer.toString(featVal));
+ }
}
cas.setFeatureValue(addr, feats[i], fsValAddr);
}
@@ -981,7 +1070,6 @@
*
* @param i
*/
-
private void remapFSListHeads(int addr) {
final int type = cas.getHeapValue(addr);
if (!listUtils.isFsListType(type))
@@ -994,12 +1082,15 @@
if (featVal != CASImpl.NULL) {
int fsValAddr = CASImpl.NULL;
try {
- fsValAddr = fsTree.get(featVal);
+ fsValAddr = getFsAddrForXmiId(featVal);
} catch (NoSuchElementException e) {
- if (!lenient)
+ if (!lenient) {
throw e;
- // if running in lenient mode, we may not have deserialized the value of this
- // element because it was of unknown type. So we set the element to null.
+ }
+ else {
+ //this may be a reference to an out-of-typesystem FS
+ this.sharedData.addOutOfTypeSystemAttribute(addr, CAS.FEATURE_BASE_NAME_HEAD, Integer.toString(featVal));
+ }
}
cas.setFeatureValue(addr, headFeat, fsValAddr);
}
@@ -1024,12 +1115,16 @@
if (arrayVal != CASImpl.NULL) {
int arrayValAddr = CASImpl.NULL;
try {
- arrayValAddr = fsTree.get(arrayVal);
+ arrayValAddr = getFsAddrForXmiId(arrayVal);
} catch (NoSuchElementException e) {
- if (!lenient)
+ if (!lenient) {
throw e;
- // if running in lenient mode, we may not have deserialized the value of this
- // element because it was of unknown type. So we set the element to null.
+ }
+ else {
+ // the array element may be out of typesystem. In that case set it
+ // to null, but record the id so we can add it back on next serialization.
+ this.sharedData.addOutOfTypeSystemArrayElement(addr, i, arrayVal);
+ }
}
cas.setArrayValue(addr, i, arrayValAddr);
}
@@ -1142,8 +1237,64 @@
}
return cas.ll_getTypeClass(type);
}
- }
+
+ /**
+ * Gets the FS address into which the XMI element with the given ID
+ * was deserialized. This method supports merging multiple XMI documents
+ * into a single CAS, by checking the XmiSerializationSharedData
+ * structure to get the address of elements that were skipped during this
+ * deserialization but were deserialized during a previous deserialization.
+ *
+ * @param xmiId
+ * @return
+ */
+ private int getFsAddrForXmiId(int xmiId) {
+ int addr = sharedData.getFsAddrForXmiId(xmiId);
+ if (addr > 0)
+ return addr;
+ else
+ throw new java.util.NoSuchElementException();
+ }
+
+ /**
+ * Adds a feature sturcture to the out-of-typesystem data. Also sets the
+ * this.outOfTypeSystemElement field, which is referred to later if we have to
+ * handle features recorded as child elements.
+ */
+ private void addToOutOfTypeSystemData(XmlElementName xmlElementName, Attributes attrs)
+ throws XCASParsingException {
+ this.outOfTypeSystemElement = new OotsElementData();
+ this.outOfTypeSystemElement.elementName = xmlElementName;
+ String attrName, attrValue;
+ for (int i = 0; i < attrs.getLength(); i++) {
+ attrName = attrs.getQName(i);
+ attrValue = attrs.getValue(i);
+ if (attrName.equals(ID_ATTR_NAME)) {
+ this.outOfTypeSystemElement.xmiId = attrValue;
+ }
+ else {
+ this.outOfTypeSystemElement.attributes.add(
+ new XmlAttribute(attrName, attrValue));
+ }
+ }
+ this.sharedData.addOutOfTypeSystemElement(this.outOfTypeSystemElement);
+ }
+ /**
+ * Adds a feature to the out-of-typesystem features list.
+ * @param ootsElem object to which to add the feature
+ * @param featName name of feature
+ * @param featVals feature values, as a list of strings
+ */
+ private void addOutOfTypeSystemFeature(OotsElementData ootsElem, String featName, List featVals) {
+ Iterator iter = featVals.iterator();
+ XmlElementName elemName = new XmlElementName(null,featName,featName);
+ while (iter.hasNext()) {
+ ootsElem.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
+ }
+ }
+ }
+
private TypeSystemImpl ts;
private Map xmiNamespaceToUimaNamespaceMap = new HashMap();
@@ -1191,7 +1342,7 @@
* @return The <code>DefaultHandler</code> to pass to the SAX parser.
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient) {
- return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null);
+ return new XmiCasDeserializerHandler((CASImpl) cas, lenient, null, -1);
}
/**
@@ -1213,8 +1364,35 @@
*/
public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
XmiSerializationSharedData sharedData) {
- return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData);
+ return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, -1);
}
+
+ /**
+ * Create a default handler for deserializing a CAS from XMI. By default this is not lenient,
+ * meaning that if the XMI references Types that are not in the Type System, an Exception will be
+ * thrown. Use {@link XmiCasDeserializer#getXmiCasHandler(CAS,boolean)} to turn on lenient mode
+ * and ignore any unknown types.
+ *
+ * @param cas
+ * This CAS will be used to hold the data deserialized from the XMI
+ * @param lenient
+ * if true, unknown Types will be ignored. If false, unknown Types will cause an
+ * exception. The default is false.
+ * @param sharedData
+ * data structure used to allow the XmiCasSerializer and XmiCasDeserializer to share
+ * information.
+ * @param mergePoint
+ * used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
+ * deserialization will be done, meaning the target CAS will be reset and the entire XMI
+ * content will be deserialized. If the mergePoint is nonnegative (including 0), the
+ * target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
+ * greater than the mergePoint value will be deserialized.
+ * @return The <code>DefaultHandler</code> to pass to the SAX parser.
+ */
+ public DefaultHandler getXmiCasHandler(CAS cas, boolean lenient,
+ XmiSerializationSharedData sharedData, int mergePoint) {
+ return new XmiCasDeserializerHandler((CASImpl) cas, lenient, sharedData, mergePoint);
+ }
/**
* Deserializes a CAS from XMI.
@@ -1231,7 +1409,7 @@
* if an I/O failure occurs
*/
public static void deserialize(InputStream aStream, CAS aCAS) throws SAXException, IOException {
- XmiCasDeserializer.deserialize(aStream, aCAS, false);
+ XmiCasDeserializer.deserialize(aStream, aCAS, false, null, -1);
}
/**
@@ -1253,13 +1431,70 @@
*/
public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient)
throws SAXException, IOException {
+ deserialize(aStream, aCAS, aLenient, null, -1);
+ }
+
+ /**
+ * Deserializes a CAS from XMI.
+ *
+ * @param aStream
+ * input stream from which to read the XCMI document
+ * @param aCAS
+ * CAS into which to deserialize. This CAS must be set up with a type system that is
+ * compatible with that in the XMI
+ * @param aLenient
+ * if true, unknown Types will be ignored. If false, unknown Types will cause an
+ * exception. The default is false.
+ * @param aSharedData
+ * an optional container for data that is shared between the {@link XmiCasSerializer} and the
+ * {@link XmiCasDeserializer}. See the JavaDocs for {@link XmiSerializationSharedData} for details.
+ *
+ * @throws SAXException
+ * if an XML Parsing error occurs
+ * @throws IOException
+ * if an I/O failure occurs
+ */
+ public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
+ XmiSerializationSharedData aSharedData)
+ throws SAXException, IOException {
+ deserialize(aStream, aCAS, aLenient, aSharedData, -1);
+ }
+
+ /**
+ * Deserializes a CAS from XMI. This version of this method supports merging multiple XMI documents into a single CAS.
+ *
+ * @param aStream
+ * input stream from which to read the XCMI document
+ * @param aCAS
+ * CAS into which to deserialize. This CAS must be set up with a type system that is
+ * compatible with that in the XMI
+ * @param aLenient
+ * if true, unknown Types will be ignored. If false, unknown Types will cause an
+ * exception. The default is false.
+ * @param aSharedData
+ * a container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+ * See the JavaDocs for {@link XmiSerializationSharedData} for details.
+ * @param aMergePoint
+ * used to support merging multiple XMI CASes. If the mergePoint is negative, "normal"
+ * deserialization will be done, meaning the target CAS will be reset and the entire XMI
+ * content will be deserialized. If the mergePoint is nonnegative (including 0), the
+ * target CAS will not be reset, and only Feature Structures whose xmi:id is strictly
+ * greater than the mergePoint value will be deserialized.
+ * @throws SAXException
+ * if an XML Parsing error occurs
+ * @throws IOException
+ * if an I/O failure occurs
+ */
+ public static void deserialize(InputStream aStream, CAS aCAS, boolean aLenient,
+ XmiSerializationSharedData aSharedData, int aMergePoint)
+ throws SAXException, IOException {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
XmiCasDeserializer deser = new XmiCasDeserializer(aCAS.getTypeSystem());
- ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient);
+ ContentHandler handler = deser.getXmiCasHandler(aCAS, aLenient, aSharedData, aMergePoint);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(aStream));
- }
-
+ }
+
/**
* Converts an XMI element name to a UIMA-style dotted type name.
*
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java Tue Feb 27 12:58:39 2007
@@ -37,8 +37,13 @@
import org.apache.uima.cas.FSIndex;
import org.apache.uima.cas.StringArrayFS;
import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.OotsElementData;
+import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
import org.apache.uima.internal.util.IntStack;
import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.apache.uima.internal.util.rb_trees.IntRedBlackTree;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
@@ -188,19 +193,24 @@
* Starts serialization
*/
private void serialize() throws IOException, SAXException {
- // populate nsUriToPrefixMap and xmiTypeNames structures based on CAS type system
+ // populate nsUriToPrefixMap and xmiTypeNames structures based on CAS
+ // type system, and out of typesytem data if any
initTypeAndNamespaceMappings();
int iElementCount = 1; // start at 1 to account for special NULL object
+ enqueueIncoming(); //make sure we enqueue every FS that was deserialized into this CAS
enqueueIndexed();
enqueueFeaturesOfIndexed();
iElementCount += indexedFSs.size();
iElementCount += queue.size();
FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
- iElementCount += (sofaIndex.size() + 1); // one View element per sofa, plus 1 for base
-
+ iElementCount += (sofaIndex.size()); // one View element per sofa
+ if (this.sharedData != null) {
+ iElementCount += this.sharedData.getOutOfTypeSystemElements().size();
+ }
+
workAttrs.clear();
computeNamespaceDeclarationAttrs(workAttrs);
workAttrs.addAttribute(XMI_NS_URI, XMI_VERSION_LOCAL_NAME, XMI_VERSION_QNAME, "CDATA",
@@ -210,6 +220,7 @@
writeNullObject(); // encodes 1 element
encodeIndexed(); // encodes indexedFSs.size() element
encodeQueued(); // encodes queue.size() elements
+ serializeOutOfTypeSystemElements(); //encodes sharedData.getOutOfTypeSystemElements().size() elements
writeViews(); // encodes cas.sofaCount + 1 elements
endElement(XMI_TAG);
}
@@ -237,15 +248,25 @@
if (sofaXmiId != null && sofaXmiId.length() > 0) {
addAttribute(workAttrs, "sofa", sofaXmiId);
}
- if (members.length > 0) {
- StringBuffer membersString = new StringBuffer();
- for (int i = 0; i < members.length; i++) {
- String xmiId = getXmiId(members[i]);
- if (xmiId != null) // to catch filtered FS
- {
- membersString.append(xmiId).append(' ');
+ StringBuffer membersString = new StringBuffer();
+ for (int i = 0; i < members.length; i++) {
+ String xmiId = getXmiId(members[i]);
+ if (xmiId != null) // to catch filtered FS
+ {
+ membersString.append(xmiId).append(' ');
+ }
+ }
+ //check for out-of-typesystem members
+ if (this.sharedData != null) {
+ List ootsMembers = this.sharedData.getOutOfTypeSystemViewMembers(sofaXmiId);
+ if (ootsMembers != null) {
+ Iterator iter = ootsMembers.iterator();
+ while (iter.hasNext()) {
+ membersString.append((String)iter.next()).append(' ');
}
}
+ }
+ if (membersString.length() > 0) {
// remove trailing space before adding to attributes
addAttribute(workAttrs, "members", membersString.substring(0, membersString.length() - 1));
}
@@ -296,13 +317,29 @@
}
/**
+ * Enqueues all FS that are stored in the XmiSerializationSharedData's id map.
+ * This map is populated during the previous deserialization. This method
+ * is used to make sure that all incoming FS are echoed in the next
+ * serialization.
+ */
+ private void enqueueIncoming() {
+ if (this.sharedData == null)
+ return;
+
+ int[] fsAddrs = this.sharedData.getAllFsAddressesInIdMap();
+ for (int i = 0; i < fsAddrs.length; i++) {
+ enqueueIndexedFs(fsAddrs[i]);
+ }
+ }
+
+ /**
* Push the indexed FSs onto the queue.
*/
private void enqueueIndexed() {
FSIndexRepositoryImpl ir = (FSIndexRepositoryImpl) cas.getBaseCAS().getBaseIndexRepository();
int[] fsarray = ir.getIndexedFSs();
for (int k = 0; k < fsarray.length; k++) {
- enqueueIndexedFs(fsarray[k], 0);
+ enqueueIndexedFs(fsarray[k]);
}
// FSIndex sofaIndex = cas.getBaseCAS().indexRepository.getIndex(CAS.SOFA_INDEX_NAME);
@@ -319,7 +356,7 @@
if (loopIR != null) {
fsarray = loopIR.getIndexedFSs();
for (int k = 0; k < fsarray.length; k++) {
- enqueueIndexedFs(fsarray[k], sofaNum);
+ enqueueIndexedFs(fsarray[k]);
}
}
}
@@ -340,7 +377,7 @@
/**
* Enqueues an indexed FS. Does NOT enqueue features at this point.
*/
- private void enqueueIndexedFs(int addr, int indexRep) {
+ private void enqueueIndexedFs(int addr) {
if (isVisited(addr)) {
return;
}
@@ -729,7 +766,7 @@
case TYPE_CLASS_FLOATLIST:
case TYPE_CLASS_FSLIST: {
// If the feature has multipleReferencesAllowed = true OR if we're already
- // inside another list node (i.e. this is the "tail" feature).
+ // inside another list node (i.e. this is the "tail" feature), serialize as a normal FS.
// Otherwise, serialize as a multi-valued property.
if (cas.ts.getFeature(feats[i]).isMultipleReferencesAllowed() || insideListNode) {
attrValue = getXmiId(featVal);
@@ -768,6 +805,21 @@
addAttribute(attrs, featName, attrValue);
}
}
+
+ //add out-of-typesystem features, if any
+ if (this.sharedData != null) {
+ OotsElementData oed = this.sharedData.getOutOfTypeSystemFeatures(addr);
+ if (oed != null) {
+ //attributes
+ Iterator attrIter = oed.attributes.iterator();
+ while (attrIter.hasNext()) {
+ XmlAttribute attr = (XmlAttribute)attrIter.next();
+ addAttribute(workAttrs, attr.name, attr.value);
+ }
+ //child elements
+ childElements.addAll(oed.childElements);
+ }
+ }
return childElements;
}
@@ -832,6 +884,9 @@
String elemStr = null;
if (arrayType == LowLevelCAS.TYPE_CLASS_FSARRAY) {
int pos = cas.getArrayStartAddress(addr);
+ List ootsArrayElementsList = this.sharedData == null ? null :
+ this.sharedData.getOutOfTypeSystemArrayElements(addr);
+ int ootsIndex = 0;
for (int j = 0; j < size; j++) {
int heapValue = cas.getHeapValue(pos++);
elemStr = null;
@@ -842,6 +897,17 @@
// special NULL object with xmi:id=0 is used to represent
// a null in an FSArray
elemStr = "0";
+ // However, this null array element might have been a reference to an
+ //out-of-typesystem FS, so check the ootsArrayElementsList
+ if (ootsArrayElementsList != null) {
+ while (ootsIndex < ootsArrayElementsList.size()) {
+ XmiArrayElement arel =(XmiArrayElement)ootsArrayElementsList.get(ootsIndex++);
+ if (arel.index == j) {
+ elemStr = arel.xmiId;
+ break;
+ }
+ }
+ }
}
if (buf.length() > 0) {
buf.append(' ');
@@ -990,6 +1056,22 @@
nsUriToPrefixMap.put(XMI_NS_URI, XMI_NS_PREFIX);
xmiTypeNames = new XmlElementName[cas.ts.getLargestTypeCode() + 1];
+ //Add any namespace prefix mappings used by out of type system data.
+ //Need to do this before the in-typesystem namespaces so that the prefix
+ //used here are reserved and won't be reused for any in-typesystem namespaces.
+ if (this.sharedData != null) {
+ Iterator ootsIter = this.sharedData.getOutOfTypeSystemElements().iterator();
+ while (ootsIter.hasNext()) {
+ OotsElementData oed = (OotsElementData)ootsIter.next();
+ String nsUri = oed.elementName.nsUri;
+ String qname = oed.elementName.qName;
+ String localName = oed.elementName.localName;
+ String prefix = qname.substring(0, qname.indexOf(localName)-1);
+ nsUriToPrefixMap.put(nsUri, prefix);
+ nsPrefixesUsed.add(prefix);
+ }
+ }
+
Iterator it = cas.ts.getTypeIterator();
while (it.hasNext()) {
TypeImpl t = (TypeImpl) it.next();
@@ -1046,35 +1128,55 @@
return new XmlElementName(nsUri, shortName, prefix + ':' + shortName);
}
- }
-
- /**
- * Inner class used to encapsulate the different pieces of information that make up the name of an
- * XML element - namely, the Namespace URI, the local name, and the qname (qualified name).
- */
- static class XmlElementName {
- XmlElementName(String nsUri, String localName, String qName) {
- this.nsUri = nsUri;
- this.localName = localName;
- this.qName = qName;
- }
-
- String nsUri;
-
- String localName;
-
- String qName;
- }
-
- static class XmlElementNameAndContents {
- XmlElementNameAndContents(XmlElementName name, String contents) {
- this.name = name;
- this.contents = contents;
- }
-
- XmlElementName name;
-
- String contents;
+
+ /**
+ * Serializes all of the out-of-typesystem elements that were recorded
+ * in the XmiSerializationSharedData during the last deserialization.
+ */
+ private void serializeOutOfTypeSystemElements() throws SAXException {
+ if (this.sharedData == null)
+ return;
+ Iterator it = this.sharedData.getOutOfTypeSystemElements().iterator();
+ while (it.hasNext()) {
+ OotsElementData oed = (OotsElementData)it.next();
+ workAttrs.clear();
+ // Add ID attribute
+ addAttribute(workAttrs, ID_ATTR_NAME, oed.xmiId);
+
+ // Add other attributes
+ Iterator attrIt = oed.attributes.iterator();
+ while (attrIt.hasNext()) {
+ XmlAttribute attr = (XmlAttribute) attrIt.next();
+ addAttribute(workAttrs, attr.name, attr.value);
+ }
+
+ // serialize element
+ startElement(oed.elementName, workAttrs, oed.childElements.size());
+
+ //serialize features encoded as child elements
+ Iterator childElemIt = oed.childElements.iterator();
+ while (childElemIt.hasNext()) {
+ XmlElementNameAndContents child = (XmlElementNameAndContents)childElemIt.next();
+ workAttrs.clear();
+ Iterator attrIter = child.attributes.iterator();
+ while (attrIter.hasNext()) {
+ XmlAttribute attr =(XmlAttribute)attrIter.next();
+ addAttribute(workAttrs, attr.name, attr.value);
+ }
+
+ if (child.contents != null) {
+ startElement(child.name, workAttrs, 1);
+ addText(child.contents);
+ }
+ else {
+ startElement(child.name, workAttrs, 0);
+ }
+ endElement(child.name);
+ }
+
+ endElement(oed.elementName);
+ }
+ }
}
public static final String XMI_NS_URI = "http://www.omg.org/XMI";
@@ -1139,6 +1241,7 @@
* the <code>serialize</code> method that contains types and features that are not in
* this typesystem, the serialization will not contain instances of those types or values
* for those features. So this can be used to filter the results of serialization.
+ * A null value indicates that all types and features will be serialized.
*/
public XmiCasSerializer(TypeSystem ts) {
this(ts, (Map) null);
@@ -1258,9 +1361,7 @@
* if an I/O failure occurs
*/
public static void serialize(CAS aCAS, OutputStream aStream) throws SAXException, IOException {
- XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aCAS.getTypeSystem());
- XMLSerializer sax2xml = new XMLSerializer(aStream, false);
- xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler());
+ serialize(aCAS, null, aStream, false, null);
}
/**
@@ -1271,7 +1372,8 @@
* CAS to serialize.
* @param aTargetTypeSystem
* type system to which the produced XMI will conform. Any types or features not in the
- * target type system will not be serialized.
+ * target type system will not be serialized. A null value indicates that all types and features
+ * will be serialized.
* @param aStream
* output stream to which to write the XMI document
*
@@ -1282,8 +1384,36 @@
*/
public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream)
throws SAXException, IOException {
- XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
- XMLSerializer sax2xml = new XMLSerializer(aStream, false);
- xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler());
+ serialize(aCAS, aTargetTypeSystem, aStream, false, null);
}
+
+ /**
+ * Serializes a CAS to an XMI stream. This version of this method allows many options to be configured.
+ *
+ * @param aCAS
+ * CAS to serialize.
+ * @param aTargetTypeSystem
+ * type system to which the produced XMI will conform. Any types or features not in the
+ * target type system will not be serialized. A null value indicates that all types and features
+ * will be serialized.
+ * @param aStream
+ * output stream to which to write the XMI document
+ * @param aPrettyPrint
+ * if true the XML output will be formatted with newlines and indenting. If false it will be unformatted.
+ * @param aSharedData
+ * an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+ * See the JavaDocs for {@link XmiSerializationSharedData} for details.
+ *
+ * @throws SAXException
+ * if a problem occurs during XMI serialization
+ * @throws IOException
+ * if an I/O failure occurs
+ */
+ public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint,
+ XmiSerializationSharedData aSharedData)
+ throws SAXException, IOException {
+ XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem);
+ XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint);
+ xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData);
+ }
}
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java?view=diff&rev=512404&r1=512403&r2=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiSerializationSharedData.java Tue Feb 27 12:58:39 2007
@@ -19,26 +19,82 @@
package org.apache.uima.cas.impl;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
+import org.apache.uima.internal.util.XmlAttribute;
+import org.apache.uima.internal.util.XmlElementName;
+import org.apache.uima.internal.util.XmlElementNameAndContents;
import org.apache.uima.internal.util.rb_trees.RedBlackTree;
/**
- * Holds information that is shared between the XmiCasSerializer and the XmiCasDeserializer. This
- * allows consistency of XMI IDs across serializations, and also provides the ability to filter out
- * some FSs during serialization (e.g. to send to a service) and then reintegrate those FSs during
- * the next deserialization.
+ * A container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}.
+ * This has a number of uses:
+ * <ul>
+ * <li>Consistency of xmi:id values across serializations. If you pass an <code>XmiSerializationSharedData</code>
+ * instance to the deserializer, the deserializer will store information about the incoming xmi:id values. If you then pass
+ * the same <code>XmiSerializationSharedData</code> object to the serializer when you attempt to serialize the same CAS,
+ * all of the incoming FS will be serialized using the same xmi:id value that they had when they were deserialized.</li>
+ * <li>Support for "out-of-typesystem data". If you call the XMI deserializer with the <code>lenient</code> parameter set to true,
+ * whenever it encounters an XMI element that doesn't correspond to a type in the type system, it will populate the
+ * <code>XmiSerializationSharedData</code> with information about these elements. If you then pass the same
+ * <code>XmiSerializationSharedData</code> object to the serializer when you attempt to serialize the same CAS, these
+ * out-of-typesystem FS will be reserialized without loss of information. References between in-typesystem and out-of-typesystem
+ * FS (in either direction) are maintained as well.</li>
+ * <li>After calling the XmiCasSerializer and passing an <code>XmiSerializationSharedData</code>, you can call the
+ * {@link #getMaxXmiId()} method to get the maximum xmi:id value in the serialized CAS. This feature, along with the consistency of
+ * xmi:id values, allows merging multiple XMI documents into a single CAS. See TODO.</li>
+ *
*
*/
public class XmiSerializationSharedData {
/**
- * A map from FeatureStructure address to XMI ID. This is built during deserialization, then used
- * by the next serialization to ensure consistent IDs.
+ * A map from FeatureStructure address to xmi:id. This is populated whenever
+ * an XMI element is serialized or deserialized. It is used by the
+ * getXmiId() method, which is done to ensure a consistent ID for each FS
+ * address across multiple serializations.
*/
private RedBlackTree fsAddrToXmiIdMap = new RedBlackTree();
+
+ /**
+ * A map from xmi:id to FeatureStructure address. This is populated whenever
+ * an XMI element is serialized or deserialized. It is used by the
+ * getFsAddrForXmiId() method, necessary to support merging multiple XMI
+ * CASes into the same CAS object.
+ **/
+ private RedBlackTree xmiIdToFsAddrMap = new RedBlackTree();
+
+ /**
+ * List of OotsElementData objects, each of which captures information about
+ * incoming XMI elements that did not correspond to any type in the type system.
+ */
+ private List ootsFs = new ArrayList();
+
+ /**
+ * Map that from the xmi:id (String) of a Sofa to a List of xmi:id's (Strings) for
+ * the out-of-typesystem FSs that are members of that Sofa's view.
+ */
+ private Map ootsViewMembers = new HashMap();
+ /** Map from Feature Structure address (Integer) to OotsElementData object, capturing information
+ * about out-of-typesystem features that were part of an in-typesystem FS. These include both
+ * features not defined in the typesystem and features that are references to out-of-typesystem
+ * elements. This information needs to be included when the FS is subsequently serialized.
+ */
+ private Map ootsFeatures = new HashMap();
+
+ /** Map from Feature Structure address (Integer) of an FSArray to a list of
+ * {@link XmiArrayElement} objects, each of which holds an index and an xmi:id
+ * for an out-of-typesystem array element.
+ */
+ private Map ootsArrayElements = new HashMap();
+
/**
* The maximum XMI ID used in the serialization. Used to generate unique IDs if needed.
*/
@@ -46,6 +102,7 @@
void addIdMapping(int fsAddr, int xmiId) {
fsAddrToXmiIdMap.put(fsAddr, Integer.toString(xmiId));
+ xmiIdToFsAddrMap.put(xmiId, new Integer(fsAddr));
if (xmiId > maxXmiId)
maxXmiId = xmiId;
}
@@ -60,14 +117,178 @@
// to be sure we get a unique Id, increment maxXmiId and use that
String idStr = Integer.toString(++maxXmiId);
fsAddrToXmiIdMap.put(fsAddr, idStr);
+ xmiIdToFsAddrMap.put(maxXmiId, new Integer(fsAddr));
return idStr;
}
}
+
+ /**
+ * Gets the maximum xmi:id that has been generated or read so far.
+ * @return the maximum xmi:id
+ */
+ public int getMaxXmiId() {
+ return maxXmiId;
+ }
+
+ /**
+ * Gets the FS address that corresponds to the given xmi:id, in the most
+ * recent serialization or deserialization.
+ *
+ * @param xmiId an xmi:id from the most recent XMI CAS that was serialized
+ * or deserialized.
+ * @return the FS address of the FeatureStructure corresponding to that
+ * xmi:id, -1 if none.
+ */
+ public int getFsAddrForXmiId(int xmiId) {
+ Integer addr = (Integer)xmiIdToFsAddrMap.get(xmiId);
+ return addr == null ? -1 : addr.intValue();
+ }
+
+ /**
+ * Clears the ID mapping information that was populated in
+ * previous serializations or deserializations.
+ * TODO: maybe a more general reset that resets other things?
+ */
public void clearIdMap() {
fsAddrToXmiIdMap.clear();
+ xmiIdToFsAddrMap.clear();
+ maxXmiId = 0;
+ }
+
+ /**
+ * Records information about an XMI element that was not an instance of any type in the type system.
+ * @param elemData information about the out-of-typesystem XMI element
+ */
+ public void addOutOfTypeSystemElement(OotsElementData elemData) {
+ this.ootsFs.add(elemData);
+ }
+
+ /**
+ * Gets a List of {@link OotsElementData} objects, each of which describes an
+ * incoming XMI element that did not correspond to a Type in the TypeSystem.
+ * @return List of {@link OotsElementData} objects
+ */
+ public List getOutOfTypeSystemElements() {
+ return Collections.unmodifiableList(this.ootsFs);
+ }
+
+ /**
+ * Records that an out-of-typesystem XMI element should be a member of the
+ * specified view.
+ * @param sofaXmiId xmi:id of a Sofa
+ * @param memberXmiId xmi:id of an out-of-typesystem element that should be
+ * a member of the view for the given Sofa
+ */
+ public void addOutOfTypeSystemViewMember(String sofaXmiId, String memberXmiId) {
+ List membersList = (List)this.ootsViewMembers.get(sofaXmiId);
+ if (membersList == null) {
+ membersList = new ArrayList();
+ this.ootsViewMembers.put(sofaXmiId, membersList);
+ }
+ membersList.add(memberXmiId);
+ }
+
+ /**
+ * Gets a List of xmi:id's (Strings) of all out-of-typesystem XMI elements
+ * that are members of the view with the given id.
+ * @param sofaXmiId xmi:id of a Sofa
+ * @return List of xmi:id's of members of the view for the given Sofa.
+ */
+ public List getOutOfTypeSystemViewMembers(String sofaXmiId) {
+ List members = (List)this.ootsViewMembers.get(sofaXmiId);
+ return members == null ? null : Collections.unmodifiableList(members);
+ }
+
+ /**
+ * Records an out-of-typesystem attribute that belongs to an in-typesystem FS.
+ * This will be added to the attributes when that FS is reserialized.
+ * @param addr CAS address of the FS
+ * @param featName name of the feature
+ * @param featVal value of the feature, as a string
+ */
+ public void addOutOfTypeSystemAttribute(int addr, String featName, String featVal) {
+ Integer key = new Integer(addr);
+ OotsElementData oed = (OotsElementData)this.ootsFeatures.get(key);
+ if (oed == null) {
+ oed = new OotsElementData();
+ this.ootsFeatures.put(key, oed);
+ }
+ oed.attributes.add(new XmlAttribute(featName, featVal));
+ }
+
+ /**
+ * Records out-of-typesystem child elements that belong to an in-typesystem FS.
+ * These will be added to the child elements when that FS is reserialized.
+ * @param addr CAS address of the FS
+ * @param featName name of the feature (element tag name)
+ * @param featVal values of the feature, as a List of strings
+ */
+ public void addOutOfTypeSystemChildElements(int addr, String featName, List featVals) {
+ Integer key = new Integer(addr);
+ OotsElementData oed = (OotsElementData)this.ootsFeatures.get(key);
+ if (oed == null) {
+ oed = new OotsElementData();
+ this.ootsFeatures.put(key, oed);
+ }
+ Iterator iter = featVals.iterator();
+ XmlElementName elemName = new XmlElementName(null,featName,featName);
+ while (iter.hasNext()) {
+ oed.childElements.add(new XmlElementNameAndContents(elemName, (String)iter.next()));
+ }
+ }
+
+ /**
+ * Gets information about out-of-typesystem features that belong to an
+ * in-typesystem FS.
+ * @param addr CAS address of the FS
+ * @return object containing information about out-of-typesystem features
+ * (both attributes and child elements)
+ */
+ public OotsElementData getOutOfTypeSystemFeatures(int addr) {
+ Integer key = new Integer(addr);
+ return (OotsElementData)this.ootsFeatures.get(key);
+ }
+
+ /**
+ * Get all FS Addresses that have been added to the id map.
+ * @return an array containing all the FS addresses
+ */
+ public int[] getAllFsAddressesInIdMap() {
+ return fsAddrToXmiIdMap.keySet();
+ }
+
+ /**
+ * Gets information about out-of-typesystem array elements.
+ * @param addr the CAS address of an FSArray
+ * @return a List of {@link XmiArrayElement} objects, each of which
+ * holds the index and xmi:id of an array element that is a
+ * reference to an out-of-typesystem FS.
+ */
+ public List getOutOfTypeSystemArrayElements(int addr) {
+ return (List)this.ootsArrayElements.get(new Integer(addr));
}
+
+ /**
+ * Records an out-of-typesystem array element in the XmiSerializationSharedData.
+ * @param addr CAS address of FSArray
+ * @param index index into array
+ * @param xmiId xmi:id of the out-of-typesystem element that is the value at the given index
+ */
+ public void addOutOfTypeSystemArrayElement(int addr, int index, int xmiId) {
+ Integer key = new Integer(addr);
+ List list = (List)this.ootsArrayElements.get(key);
+ if (list == null) {
+ list = new ArrayList();
+ this.ootsArrayElements.put(key, list);
+ }
+ list.add(new XmiArrayElement(index, Integer.toString(xmiId)));
+ }
+
+ /**
+ * For debugging purposes only.
+ */
void checkForDups() {
Set ids = new HashSet();
Iterator iter = fsAddrToXmiIdMap.iterator();
@@ -79,6 +300,9 @@
}
}
+ /**
+ * For debugging purposes only.
+ */
public String toString() {
StringBuffer buf = new StringBuffer();
int[] keys = fsAddrToXmiIdMap.keySet();
@@ -86,5 +310,47 @@
buf.append(keys[i]).append(": ").append(fsAddrToXmiIdMap.get(keys[i])).append('\n');
}
return buf.toString();
+ }
+
+ /**
+ * Data structure holding all information about an XMI element
+ * containing an out-of-typesystem FS.
+ */
+ static class OotsElementData {
+ /**
+ * xmi:id of the element
+ */
+ String xmiId;
+
+ /**
+ * Name of the element, including XML namespace.
+ */
+ XmlElementName elementName;
+
+ /**
+ * List of XmlAttribute objects each holding name and value of an attribute.
+ */
+ List attributes = new ArrayList();
+
+ /**
+ * List of XmlElementNameAndContents objects each describing one of the
+ * child elements representing features of this out-of-typesystem element.
+ */
+ List childElements = new ArrayList();
+ }
+
+ /**
+ * Data structure holding the index and the xmi:id of an array or list element that
+ * is a reference to an out-of-typesystem FS.
+ */
+ static class XmiArrayElement {
+ int index;
+
+ String xmiId;
+
+ XmiArrayElement(int index, String xmiId) {
+ this.index = index;
+ this.xmiId = xmiId;
+ }
}
}
Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlAttribute.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+/**
+ * Data structure representing an XML attribute.
+ */
+public class XmlAttribute {
+ public String name;
+ public String value;
+
+ public XmlAttribute(String name, String value) {
+ this.name = name;
+ this.value = value;
+ }
+}
Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementName.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,20 @@
+package org.apache.uima.internal.util;
+
+/**
+ * Data structure used to encapsulate the different pieces of information that
+ * make up the name of an XML element - namely, the Namespace URI, the local
+ * name, and the qname (qualified name).
+ */
+public class XmlElementName {
+ public XmlElementName(String nsUri, String localName, String qName) {
+ this.nsUri = nsUri;
+ this.localName = localName;
+ this.qName = qName;
+ }
+
+ public String nsUri;
+
+ public String localName;
+
+ public String qName;
+}
\ No newline at end of file
Added: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java?view=auto&rev=512404
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XmlElementNameAndContents.java Tue Feb 27 12:58:39 2007
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.internal.util;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Data structure used to encapsulate an XML element name (including Namespace URI,
+ * local name, and the qname) as well as its attributes and character content.
+ */
+public class XmlElementNameAndContents {
+ public XmlElementNameAndContents(XmlElementName name, String contents) {
+ this(name, contents, Collections.EMPTY_LIST);
+ }
+
+ public XmlElementNameAndContents(XmlElementName name, String contents, List attributes) {
+ this.name = name;
+ this.contents = contents;
+ this.attributes = attributes;
+ }
+
+ public XmlElementName name;
+
+ /**
+ * List of XmlAttribute objects each holding name and value of an attribute.
+ */
+ public List attributes;
+
+ public String contents;
+}
\ No newline at end of file