You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by sc...@apache.org on 2012/01/20 20:33:08 UTC
svn commit: r1234089 - in
/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima:
resource/metadata/impl/MetaDataObject_impl.java util/XMLParser.java
util/XMLSerializer.java util/impl/SaxDeserializer_impl.java
util/impl/XMLParser_impl.java
Author: schor
Date: Fri Jan 20 19:33:07 2012
New Revision: 1234089
URL: http://svn.apache.org/viewvc?rev=1234089&view=rev
Log:
[UIMA-239] support parsing UIMA descriptors in a mode that preserves comments and formatting whitespace.
Modified:
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLParser.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/SaxDeserializer_impl.java
uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/XMLParser_impl.java
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java?rev=1234089&r1=1234088&r2=1234089&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/resource/metadata/impl/MetaDataObject_impl.java Fri Jan 20 19:33:07 2012
@@ -49,20 +49,25 @@ import org.apache.uima.util.InvalidXMLEx
import org.apache.uima.util.NameClassPair;
import org.apache.uima.util.XMLParser;
import org.apache.uima.util.XMLSerializer;
+import org.apache.uima.util.XMLSerializer.CharacterValidatingContentHandler;
import org.apache.uima.util.XMLizable;
+import org.w3c.dom.CharacterData;
import org.w3c.dom.Comment;
+import org.w3c.dom.DOMException;
+import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
+import org.w3c.dom.Text;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
+import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.AttributesImpl;
/**
* Abstract base class for all MetaDataObjects in the reference implementation. Provides basic
- * support for getting and setting property values given their names, by storing all attribute
- * values in a HashMap keyed on attribute name.
+ * support for getting and setting property values given their names, using bean introspection and reflection.
* <p>
* Also provides the ability to write objects to XML and build objects from their DOM
* representation, as required to implement the {@link XMLizable} interface, which is a
@@ -92,6 +97,13 @@ public abstract class MetaDataObject_imp
private transient URL mSourceUrl;
+ // This is only used if we are capturing comments and ignorable whitespace in the XML
+ private transient Node infoset = null; // by default, set to null
+
+ public void setInfoset(Node infoset) {
+ this.infoset = infoset;
+ }
+
/**
* Creates a new <code>MetaDataObject_impl</code> with null attribute values
*/
@@ -549,11 +561,7 @@ public abstract class MetaDataObject_imp
* a Writer to which the XML string will be written
*/
public void toXML(Writer aWriter) throws SAXException, IOException {
- XMLSerializer sax2xml = new XMLSerializer(aWriter);
- ContentHandler contentHandler = sax2xml.getContentHandler();
- contentHandler.startDocument();
- toXML(sax2xml.getContentHandler(), true);
- contentHandler.endDocument();
+ toXML(new XMLSerializer(aWriter));
}
/**
@@ -563,11 +571,14 @@ public abstract class MetaDataObject_imp
* an OutputStream to which the XML string will be written
*/
public void toXML(OutputStream aOutputStream) throws SAXException, IOException {
- XMLSerializer sax2xml = new XMLSerializer(aOutputStream);
+ toXML(new XMLSerializer(aOutputStream));
+ }
+
+ private void toXML(XMLSerializer sax2xml) throws SAXException, IOException {
ContentHandler contentHandler = sax2xml.getContentHandler();
contentHandler.startDocument();
toXML(sax2xml.getContentHandler(), true);
- contentHandler.endDocument();
+ contentHandler.endDocument();
}
/**
@@ -583,7 +594,7 @@ public abstract class MetaDataObject_imp
public void toXML(ContentHandler aContentHandler, boolean aWriteDefaultNamespaceAttribute)
throws SAXException {
XmlizationInfo inf = getXmlizationInfo();
-
+
// write the element's start tag
// get attributes (can be provided by subclasses)
AttributesImpl attrs = getXMLAttributes();
@@ -595,16 +606,22 @@ public abstract class MetaDataObject_imp
}
// start element
- aContentHandler.startElement(inf.namespace, inf.elementTagName, inf.elementTagName, attrs);
-
- // write child elements
- for (int i = 0; i < inf.propertyInfo.length; i++) {
- PropertyXmlInfo propInf = inf.propertyInfo[i];
- writePropertyAsElement(propInf, inf.namespace, aContentHandler);
+ outputStartElement(aContentHandler, infoset, inf.namespace, inf.elementTagName, inf.elementTagName, attrs);
+ // write child elements
+
+ CharacterValidatingContentHandler cc = (CharacterValidatingContentHandler) aContentHandler;
+ cc.lastOutputNodeAddLevel();
+ try {
+ for (int i = 0; i < inf.propertyInfo.length; i++) {
+ PropertyXmlInfo propInf = inf.propertyInfo[i];
+ writePropertyAsElement(propInf, inf.namespace, aContentHandler);
+ }
+ } finally {
+ cc.lastOutputNodeClearLevel();
}
-
+
// end element
- aContentHandler.endElement(inf.namespace, inf.elementTagName, inf.elementTagName);
+ outputEndElement(aContentHandler, infoset, inf.namespace, inf.elementTagName, inf.elementTagName);
}
/**
@@ -666,10 +683,9 @@ public abstract class MetaDataObject_imp
return;
// if XML element name was supplied, write a tag
- if (aPropInfo.xmlElementName != null) {
- aContentHandler.startElement(aNamespace, aPropInfo.xmlElementName, aPropInfo.xmlElementName,
+ Node elementNode = findMatchingSubElement(aContentHandler, aPropInfo.xmlElementName);
+ outputStartElement(aContentHandler, elementNode, aNamespace, aPropInfo.xmlElementName, aPropInfo.xmlElementName,
EMPTY_ATTRIBUTES);
- }
// get class of property
Class propClass = getAttributeClass(aPropInfo.propertyName);
@@ -701,9 +717,7 @@ public abstract class MetaDataObject_imp
}
// if XML element name was supplied, end the element that we started
- if (aPropInfo.xmlElementName != null) {
- aContentHandler.endElement(aNamespace, aPropInfo.xmlElementName, aPropInfo.xmlElementName);
- }
+ outputEndElement(aContentHandler, elementNode, aNamespace, aPropInfo.xmlElementName, aPropInfo.xmlElementName);
}
/**
@@ -729,47 +743,51 @@ public abstract class MetaDataObject_imp
throws SAXException {
// if aPropClass is generic Object, reader won't know whether to expect
// an array, so we tell it be writing an "array" element here.
+ Node arraySubElement = findMatchingSubElement(aContentHandler, "array");
if (aPropClass == Object.class) {
- aContentHandler.startElement(aNamespace, "array", "array", EMPTY_ATTRIBUTES);
+ outputStartElement(aContentHandler, arraySubElement, aNamespace, "array", "array", EMPTY_ATTRIBUTES);
}
// iterate through elements of the array (at this point we don't allow
// nested arrays here
int len = ((Object[]) aValue).length;
- for (int i = 0; i < len; i++) {
- Object curElem = Array.get(aValue, i);
-
- // if a particular array element tag has been specified, write it
- if (aArrayElementTagName != null) {
- aContentHandler.startElement(aNamespace, aArrayElementTagName, aArrayElementTagName,
- EMPTY_ATTRIBUTES);
- }
-
- // if attribute's value is an XMLizable object, call its toXML method
- if (curElem instanceof XMLizable) {
- ((XMLizable) curElem).toXML(aContentHandler);
- }
- // else, attempt to write it as a primitive
- else {
- if (aArrayElementTagName == null) {
- // need to include the type, e.g. <string>
- XMLUtils.writePrimitiveValue(curElem, aContentHandler);
- } else {
- // don't include the type - just write the value
- String valStr = curElem.toString();
- aContentHandler.characters(valStr.toCharArray(), 0, valStr.length());
+ CharacterValidatingContentHandler cc = (CharacterValidatingContentHandler) aContentHandler;
+ cc.lastOutputNodeAddLevel();
+ try {
+ for (int i = 0; i < len; i++) {
+ Object curElem = Array.get(aValue, i);
+ Node matchingArrayElement = findMatchingSubElement(aContentHandler, aArrayElementTagName);
+
+ // if a particular array element tag has been specified, write it
+ outputStartElement(aContentHandler, matchingArrayElement, aNamespace, aArrayElementTagName, aArrayElementTagName,
+ EMPTY_ATTRIBUTES);
+
+ // if attribute's value is an XMLizable object, call its toXML method
+ if (curElem instanceof XMLizable) {
+ ((XMLizable) curElem).toXML(aContentHandler);
+ }
+ // else, attempt to write it as a primitive
+ else {
+ if (aArrayElementTagName == null) {
+ // need to include the type, e.g. <string>
+ XMLUtils.writePrimitiveValue(curElem, aContentHandler);
+ } else {
+ // don't include the type - just write the value
+ String valStr = curElem.toString();
+ aContentHandler.characters(valStr.toCharArray(), 0, valStr.length());
+ }
}
+
+ // if we started an element, end it
+ outputEndElement(aContentHandler, matchingArrayElement, aNamespace, aArrayElementTagName, aArrayElementTagName);
}
-
- // if we started an element, end it
- if (aArrayElementTagName != null) {
- aContentHandler.endElement(aNamespace, aArrayElementTagName, aArrayElementTagName);
- }
+ } finally {
+ cc.lastOutputNodeClearLevel();
}
// if we started an "Array" element, end it
if (aPropClass == Object.class) {
- aContentHandler.endElement(aNamespace, "array", "array");
+ outputEndElement(aContentHandler, arraySubElement, aNamespace, "array", "array");
}
}
@@ -797,55 +815,54 @@ public abstract class MetaDataObject_imp
String aKeyXmlAttribute, String aValueTagName, boolean aOmitIfNull, String aNamespace,
ContentHandler aContentHandler) throws SAXException {
// get map
- Map theMap = (Map) getAttributeValue(aPropName);
-
+ @SuppressWarnings("unchecked")
+ Map<String, Object> theMap = (Map<String, Object>) getAttributeValue(aPropName);
+ Node matchingNode = findMatchingSubElement(aContentHandler, aXmlElementName);
+
// if map is empty handle appropriately
if (theMap == null || theMap.isEmpty()) {
if (!aOmitIfNull && aXmlElementName != null) {
- aContentHandler
- .startElement(aNamespace, aXmlElementName, aXmlElementName, EMPTY_ATTRIBUTES);
- aContentHandler.endElement(aNamespace, aXmlElementName, aXmlElementName);
+ outputStartElement(aContentHandler, matchingNode, aNamespace, aXmlElementName, aXmlElementName, EMPTY_ATTRIBUTES);
+ outputEndElement(aContentHandler, matchingNode, aNamespace, aXmlElementName, aXmlElementName);
}
} else {
// write start tag for attribute if desired
- if (aXmlElementName != null) {
- aContentHandler
- .startElement(aNamespace, aXmlElementName, aXmlElementName, EMPTY_ATTRIBUTES);
- }
-
- // iterate over entries in the Map
- Set entries = theMap.entrySet();
- Iterator i = entries.iterator();
- while (i.hasNext()) {
- Map.Entry curEntry = (Map.Entry) i.next();
- String key = (String) curEntry.getKey();
+ outputStartElement(aContentHandler, matchingNode, aNamespace, aXmlElementName, aXmlElementName, EMPTY_ATTRIBUTES);
- // write a tag for the value, with a "key" attribute
- AttributesImpl attrs = new AttributesImpl();
- attrs.addAttribute("", aKeyXmlAttribute, aKeyXmlAttribute, null, key); // are these nulls
- // OK?
- aContentHandler.startElement(aNamespace, aValueTagName, aValueTagName, attrs);
-
- // write the value (must be XMLizable or an array of XMLizable)
- Object val = curEntry.getValue();
- if (val.getClass().isArray()) {
- Object[] arr = (Object[]) val;
- for (int j = 0; j < arr.length; j++) {
- XMLizable elem = (XMLizable) arr[j];
- elem.toXML(aContentHandler);
+ CharacterValidatingContentHandler cc = (CharacterValidatingContentHandler) aContentHandler;
+ cc.lastOutputNodeAddLevel();
+ try {
+ // iterate over entries in the Map
+ for (Map.Entry<String, Object> curEntry : theMap.entrySet()) {
+ String key = curEntry.getKey();
+
+ // write a tag for the value, with a "key" attribute
+ AttributesImpl attrs = new AttributesImpl();
+ attrs.addAttribute("", aKeyXmlAttribute, aKeyXmlAttribute, null, key); // are these nulls OK?
+ Node innerMatchingNode = findMatchingSubElement(aContentHandler, aValueTagName);
+ outputStartElement(aContentHandler, innerMatchingNode, aNamespace, aValueTagName, aValueTagName, attrs);
+
+ // write the value (must be XMLizable or an array of XMLizable)
+ Object val = curEntry.getValue();
+ if (val.getClass().isArray()) {
+ Object[] arr = (Object[]) val;
+ for (int j = 0; j < arr.length; j++) {
+ XMLizable elem = (XMLizable) arr[j];
+ elem.toXML(aContentHandler);
+ }
+ } else {
+ ((XMLizable) val).toXML(aContentHandler);
}
- } else {
- ((XMLizable) val).toXML(aContentHandler);
+
+ // write end tag for the value
+ outputEndElement(aContentHandler, innerMatchingNode, aNamespace, aValueTagName, aValueTagName);
}
-
- // write end tag for the value
- aContentHandler.endElement(aNamespace, aValueTagName, aValueTagName);
+ } finally {
+ cc.lastOutputNodeClearLevel();
}
// if we wrote start tag for attribute, now write end tag
- if (aXmlElementName != null) {
- aContentHandler.endElement(aNamespace, aXmlElementName, aXmlElementName);
- }
+ outputEndElement(aContentHandler, matchingNode, aNamespace, aXmlElementName, aXmlElementName);
}
}
@@ -889,6 +906,10 @@ public abstract class MetaDataObject_imp
throw new InvalidXMLException(InvalidXMLException.INVALID_ELEMENT_TYPE, new Object[] {
getXmlizationInfo().elementTagName, aElement.getTagName() });
+ if (aOptions.preserveComments) {
+ infoset = aElement;
+ }
+
// get child elements, each of which represents a property
List<String> foundProperties = new ArrayList<String>();
NodeList childNodes = aElement.getChildNodes();
@@ -921,9 +942,6 @@ public abstract class MetaDataObject_imp
readUnknownPropertyValueFromXMLElement(curElem, aParser, aOptions, foundProperties);
}
}
- } else if (curNode instanceof Comment) {
- Comment curElem = (Comment) curNode;
- String comment = curElem.getData();
}
}
}
@@ -1285,4 +1303,287 @@ public abstract class MetaDataObject_imp
return pd;
}
+
+ /**
+ * Heuristics for comment and whitespace processing
+ *
+ * Example:
+ * <!-- at top -->
+ * <a> <!-- same line -->
+ * <b/>
+ * <d> <!-- cmt --> <e/> </d>
+ * <c/> <!-- same line -->
+ * <!-- unusual case, following final one at a level -->
+ * </a> <!-- same line -->
+ * <!-- at bottom -->
+ *
+ * Each element has 2 calls:
+ * startElement, endElement
+ * Surround these with:
+ * maybeOutputCommentsBefore
+ * maybeOutputCommentsAfter
+ *
+ * Detect top level (by fact that parent is null), and for top level:
+ * collect all above -> output before startelement
+ * collect all below -> output after endelement
+ *
+ * For normal element node, "start":
+ * --> output before element
+ * collect all prev siblings up to first newline (assume before that, the comment goes with the node having children)
+ * if no nl assume comments go with previous element, and skip here
+ * (stop looking if get null for getPreviousSibling())
+ * (stop looking if get other than comment or ignorable whitespace)
+ * (ignorable whitespace not always distinguishable from text that is whitespace?)
+ * --> output after element:
+ * if element children: eg: <start> <!-- cmt -->
+ * collect all up to and including first nl before first child
+ * (stop at first Element node; if no nl, then the source had multiple elements on one line:
+ * associate the comments and whitespace with previous (and output them).
+ *
+ * if no element children: - means it's written <xxx/> or <xxx></xxx> or <xxx> something </xxx>
+ * output nothing - after comments will be done following endElement call
+ *
+ * For normal element node, "end":
+ * --> output before element
+ * if element children:
+ * collect all after last child Element; skip all up to first nl (assume before that, the comment goes with last child node)
+ * if no nl (e.g. </lastChild> <!-- cmt --> </elementBeingEnded> )
+ * assume comments go with previous element, and skip here
+ * (stop looking if get null for getNextSibling())
+ * (stop looking if get Element)
+ *
+ * if no element children - output nothing
+ * --> output after element
+ * if this element has no successor sibling elements
+ * collect all up to the null
+ * else
+ * collect all up to and including first nl from getNextSibling().
+ * (stop at first Element)
+ *
+ * For implied element nodes (no Java model object corresponding)
+ * We have only the "parent" node, and the element name. Try to do matching on the element name
+ * In this case, we always are working with the children in the Dom infoset; we have a last-outputted reference
+ * Scan from last-outputted, to find element match, and then use that element as the "root".
+ *
+ */
+
+ /**
+ * CoIw = Comment or IgnorableWhitespace
+ *
+ */
+
+ private void maybeOutputCoIwBeforeStart(ContentHandler contentHandler, Node node) throws SAXException {
+ if (null == node) {
+ return;
+ }
+ if (node.getParentNode() instanceof Document) {
+
+ // Special handling for top node:
+ // The SAX parser doesn't do callbacks for whitespace that come before the top node.
+ // It does do callbacks for comments, though.
+
+ // For this case, we do (one time) insert of "nl" as follows:
+ // 1 nl before top element
+ // 1 nl before each preceeding comment
+
+ outputNL(contentHandler);
+
+ for (Node c = node.getParentNode().getFirstChild(); c != node; c = c.getNextSibling()) {
+ if (c instanceof Comment) {
+ outputCoIw(contentHandler, c);
+ outputNL(contentHandler);
+ }
+ }
+ return;
+ }
+ for (Node p = getFirstPrevCoIw(node); p != node; p = p.getNextSibling()) {
+ outputCoIw(contentHandler, p);
+ }
+ }
+
+ private void maybeOutputCoIwAfterStart(ContentHandler contentHandler, Node node) throws SAXException {
+ if (null == node || (!hasElementChildNode(node))) {
+ return;
+ }
+
+ for (Node n = node.getFirstChild(); isCoIw(n); n = n.getNextSibling()) {
+ outputCoIw(contentHandler, n);
+ if (hasNewline(n)) {
+ return;
+ }
+ }
+ }
+
+ private void maybeOutputCoIwBeforeEnd(ContentHandler contentHandler, Node node) throws SAXException {
+ if (null == node || (!hasElementChildNode(node))) {
+ return;
+ }
+ Node n = node.getLastChild();
+ Node np = null;
+ boolean newlineFound = false;
+ for (Node p = n; p != null && !(p instanceof Element); p = p.getPreviousSibling()) {
+ if (hasNewline(p)) {
+ newlineFound = true;
+ }
+ np = p;
+ }
+ if (!newlineFound) {
+ return;
+ }
+ for (Node o = skipUpToFirstAfterNL(np); o != null; o = o.getNextSibling()) {
+ outputCoIw(contentHandler, o);
+ }
+ }
+
+ private void maybeOutputCoIwAfterEnd(ContentHandler contentHandler, Node node) throws SAXException {
+ if (null == node) {
+ return;
+ }
+ for (Node o = node.getNextSibling(); isCoIw(o); o = o.getNextSibling()) {
+ outputCoIw(contentHandler, o);
+ if (hasNewline(o)) {
+ break;
+ }
+ }
+ }
+
+ /**
+ * Scan from last output node the child nodes, looking for a matching element.
+ * Side effect if found - set lastoutput node to the found one.
+ * @param contentHandler
+ * @param elementName
+ * @return null (if no match) or matching node
+ */
+ private Node findMatchingSubElement(ContentHandler contentHandler, String elementName) {
+ if (null == infoset || null == elementName) {
+ return null;
+ }
+ CharacterValidatingContentHandler c = (CharacterValidatingContentHandler) contentHandler;
+ Node lastOutput = c.getLastOutputNode();
+ Node n = (lastOutput == null) ? infoset.getFirstChild() : lastOutput.getNextSibling();
+ for (; n != null; n = n.getNextSibling()) {
+ if ((n instanceof Element) &&
+ elementName.equals(((Element)n).getTagName())) {
+ c.setLastOutputNode(n);
+ return n;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * Scan backwards from argument node, continuing until get something other than
+ * comment or ignorable whitespace.
+ * Return the first node after a nl
+ * If no nl found, return original node
+ *
+ * NOTE: never called with original == the top node
+ * @param r - guaranteed non-null
+ * @return
+ */
+ private Node getFirstPrevCoIw(Node original) {
+ boolean newlineFound = false;
+ Node p = original; // tracks one behind r
+ for (Node r = p.getPreviousSibling(); isCoIw(r); r = r.getPreviousSibling()) {
+ if (hasNewline(r)) {
+ newlineFound = true;
+ }
+ p = r;
+ }
+ if (!newlineFound) {
+ return original;
+ }
+ return skipUpToFirstAfterNL(p);
+ }
+
+ /**
+ * Skip nodes going forwards until find one with a nl, then return the one following
+ * @param n must not be null, and there must be a NL in the siblings
+ * @return node following the one with a new line
+ */
+ private Node skipUpToFirstAfterNL(Node n) {
+ while (!hasNewline(n)) {
+ n = n.getNextSibling();
+ }
+ return n.getNextSibling();
+ }
+
+ private boolean hasNewline(Node n) {
+ if (n instanceof Comment) {
+ return false;
+ }
+ CharacterData c = (CharacterData) n;
+ return (-1) != c.getData().indexOf('\n');
+ }
+
+ private boolean hasElementChildNode(Node n) {
+ for (Node c = n.getFirstChild(); (c != null); c = c.getNextSibling()) {
+ if (c instanceof Element) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private void outputCoIw(ContentHandler contentHandler, Node p) throws DOMException, SAXException {
+ if (p instanceof Comment) {
+ Comment c = (Comment)p;
+ ((LexicalHandler)contentHandler).comment(c.getData().toCharArray(), 0, c.getLength());
+ } else {
+ String s = p.getTextContent();
+ contentHandler.characters(s.toCharArray(), 0, s.length());
+ }
+
+ }
+
+ private boolean isCoIw(Node n) {
+ return (n != null) && ((n instanceof Comment) || isWhitespaceText(n));
+ }
+
+ private boolean isWhitespaceText(Node n) {
+ if (!(n instanceof Text)) {
+ return false;
+ }
+ Text t = (Text) n;
+ String s = t.getData();
+ for (int i = 0; i < s.length(); i++) {
+ if (!Character.isWhitespace(s.charAt(i))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private void outputStartElement(ContentHandler aContentHandler,
+ Node node,
+ String aNamespace,
+ String localname,
+ String qname,
+ Attributes attributes) throws SAXException {
+ if (null == localname) {
+ return;
+ }
+ maybeOutputCoIwBeforeStart(aContentHandler, node);
+ aContentHandler.startElement(aNamespace, localname, qname, attributes);
+ maybeOutputCoIwAfterStart(aContentHandler, node);
+ }
+
+ private void outputEndElement(ContentHandler aContentHandler,
+ Node node,
+ String aNamespace,
+ String localname,
+ String qname) throws SAXException {
+ if (null == localname) {
+ return;
+ }
+ maybeOutputCoIwBeforeEnd(aContentHandler, node);
+ aContentHandler.endElement(aNamespace, localname, qname);
+ maybeOutputCoIwAfterEnd(aContentHandler, node);
+ }
+
+ private static final char[] nlca = new char[] {'\n'};
+ private void outputNL(ContentHandler contentHandler) throws SAXException {
+ contentHandler.characters(nlca, 0, 1);
+ }
+
}
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLParser.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLParser.java?rev=1234089&r1=1234088&r2=1234089&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLParser.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLParser.java Fri Jan 20 19:33:07 2012
@@ -815,13 +815,18 @@ public interface XMLParser {
* @deprecated XInclude is no longer supported
*/
@Deprecated
- public boolean expandXIncludes;
+ public boolean expandXIncludes;
/**
* Whether to expand <envVarRef>VARNAME</envVarRef> elements by substituting the
* value of the System proprery VARNAME.
*/
public boolean expandEnvVarRefs;
+
+ /**
+ * Whether to preserve comments and ignorable whitespace
+ */
+ public boolean preserveComments = false;
/**
* Creates a new ParsingOptions object.
@@ -834,7 +839,7 @@ public interface XMLParser {
* @deprecated XInclude is no longer supported
*/
@Deprecated
- public ParsingOptions(boolean aExpandXIncludes, boolean aExpandEnvVarRefs) {
+ public ParsingOptions(boolean aExpandXIncludes, boolean aExpandEnvVarRefs) {
expandXIncludes = aExpandXIncludes;
expandEnvVarRefs = aExpandEnvVarRefs;
}
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java?rev=1234089&r1=1234088&r2=1234089&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XMLSerializer.java Fri Jan 20 19:33:07 2012
@@ -21,6 +21,8 @@ package org.apache.uima.util;
import java.io.OutputStream;
import java.io.Writer;
+import java.util.ArrayList;
+import java.util.List;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
@@ -41,6 +43,7 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
+import org.xml.sax.ext.LexicalHandler;
/**
* Utility class that generates XML output from SAX events or DOM nodes.
@@ -55,7 +58,7 @@ public class XMLSerializer {
private OutputStream mOutputStream;
private Writer mWriter;
-
+
public XMLSerializer() {
this(true);
}
@@ -78,6 +81,10 @@ public class XMLSerializer {
}
}
+ public void setIndent(boolean yes) {
+ mTransformer.setOutputProperty(OutputKeys.INDENT, yes ? "yes" : "no");
+ }
+
public XMLSerializer(OutputStream aOutputStream) {
this();
setOutputStream(aOutputStream);
@@ -156,10 +163,28 @@ public class XMLSerializer {
}
}
- static class CharacterValidatingContentHandler implements ContentHandler {
+ public static class CharacterValidatingContentHandler implements ContentHandler, LexicalHandler {
ContentHandler mHandler;
boolean mXml11;
+ private List<Node> mLastOutputNode = new ArrayList<Node>(); // the last output node for repeated subelement nodes
+
+ public void lastOutputNodeAddLevel() {
+ mLastOutputNode.add(null);
+ }
+
+ public void setLastOutputNode(Node n) {
+ mLastOutputNode.set(mLastOutputNode.size() -1, n);
+ }
+
+ public Node getLastOutputNode() {
+ return mLastOutputNode.get(mLastOutputNode.size() -1);
+ }
+
+ public void lastOutputNodeClearLevel() {
+ mLastOutputNode.remove(mLastOutputNode.size() -1);
+ }
+
CharacterValidatingContentHandler(boolean xml11, ContentHandler serializerHandler) {
mHandler = serializerHandler;
mXml11 = xml11;
@@ -264,6 +289,17 @@ public class XMLSerializer {
" character: " + ch[index]
+ ", 0x" + Integer.toHexString(ch[index]), null);
}
- }
+ }
+
+ public void comment(char[] ch, int start, int length) throws SAXException {
+ ((LexicalHandler)mHandler).comment(ch, start, length);
+ }
+
+ public void endCDATA() throws SAXException {}
+ public void endDTD() throws SAXException {}
+ public void endEntity(String arg0) throws SAXException {}
+ public void startCDATA() throws SAXException {}
+ public void startDTD(String arg0, String arg1, String arg2) throws SAXException {}
+ public void startEntity(String arg0) throws SAXException {}
}
}
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/SaxDeserializer_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/SaxDeserializer_impl.java?rev=1234089&r1=1234088&r2=1234089&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/SaxDeserializer_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/SaxDeserializer_impl.java Fri Jan 20 19:33:07 2012
@@ -112,7 +112,17 @@ public class SaxDeserializer_impl implem
public XMLizable getObject() throws InvalidXMLException {
// COMMENT NODEs may be present, and getDocumentElement would skip it...
Node rootDomNode = ((Document) mDOMResult.getNode()).getDocumentElement();
-
+// NodeList children = mDOMResult.getNode().getChildNodes();
+// for (int i = 0; i < children.getLength(); i ++) {
+// System.out.format("Child: %s", children.item(i).getNodeName());
+// if (children.item(i) instanceof Text) {
+// String s = children.item(i).getTextContent();
+// for (int j = 0; j < s.length(); j++) {
+// System.out.format(" %d", s.codePointAt(j));
+// }
+// }
+// System.out.print("\n");
+// }
// build the object
XMLizable result = mUimaXmlParser.buildObject((Element) rootDomNode, mOptions);
@@ -122,12 +132,12 @@ public class SaxDeserializer_impl implem
return result;
}
-
+
/**
* @see org.xml.sax.ContentHandler#characters(char[], int, int)
*/
public void characters(char[] ch, int start, int length) throws SAXException {
- // System.out.println("SaxDeserializer_impl::characters");
+// System.out.format("SaxDeserializer_impl::characters: %s%n", new String(ch, start, length));
mTransformerHandler.characters(ch, start, length);
}
@@ -160,7 +170,10 @@ public class SaxDeserializer_impl implem
* @see org.xml.sax.ContentHandler#ignorableWhitespace(char[], int, int)
*/
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
- mTransformerHandler.ignorableWhitespace(ch, start, length);
+// System.out.format("SaxDeserializer_impl::ignorableWS: %s%n", new String(ch, start, length));
+ if (mOptions.preserveComments) {
+ mTransformerHandler.ignorableWhitespace(ch, start, length);
+ }
}
/**
Modified: uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/XMLParser_impl.java
URL: http://svn.apache.org/viewvc/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/XMLParser_impl.java?rev=1234089&r1=1234088&r2=1234089&view=diff
==============================================================================
--- uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/XMLParser_impl.java (original)
+++ uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/impl/XMLParser_impl.java Fri Jan 20 19:33:07 2012
@@ -137,7 +137,7 @@ public class XMLParser_impl implements X
// Turn on namespace support
factory.setNamespaceAware(true);
- SAXParser parser = factory.newSAXParser(); // in the future, if performance issue, can save this , and reuse with reset()
+ SAXParser parser = factory.newSAXParser(); // unless multi-threaded, in the future, if performance issue, can save this , and reuse with reset()
XMLReader reader = parser.getXMLReader();
reader.setFeature("http://xml.org/sax/features/namespace-prefixes", true);
@@ -173,7 +173,9 @@ public class XMLParser_impl implements X
// Parse with SaxDeserializer
SaxDeserializer deser = new SaxDeserializer_impl(this, aOptions);
reader.setContentHandler(deser);
- reader.setProperty ("http://xml.org/sax/properties/lexical-handler", deser);
+ if (aOptions.preserveComments) {
+ reader.setProperty ("http://xml.org/sax/properties/lexical-handler", deser);
+ }
reader.parse(input);
// if there was an exception, throw it
@@ -287,12 +289,21 @@ public class XMLParser_impl implements X
UIMA_IllegalStateException.COULD_NOT_INSTANTIATE_XMLIZABLE, new Object[] { cls
.getName() }, e);
}
-
- // construct the XMLizable object from the XML element
- object.buildFromXMLElement(aElement, this, aOptions);
+
+ callBuildFromXMLElement(aElement, object, aOptions);
return object;
}
+
+ private void callBuildFromXMLElement(Element aElement, XMLizable object, ParsingOptions aOptions)
+ throws InvalidXMLException {
+ if (aOptions.preserveComments && (object instanceof MetaDataObject_impl)) {
+ ((MetaDataObject_impl)object).setInfoset(aElement);
+ }
+
+ object.buildFromXMLElement(aElement, this, aOptions);
+
+ }
/*
* (non-Javadoc)
@@ -326,8 +337,7 @@ public class XMLParser_impl implements X
}
// construct the XMLizable object from the XML element
- object.buildFromXMLElement(aElement, this, aOptions);
-
+ callBuildFromXMLElement(aElement, object, aOptions);
return object;
}