You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@uima.apache.org by tw...@apache.org on 2007/05/28 16:45:26 UTC
svn commit: r542240 - in /incubator/uima/uimaj/trunk/uimaj-core/src:
main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
main/java/org/apache/uima/internal/util/XMLUtils.java
test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java
Author: twgoetz
Date: Mon May 28 07:45:25 2007
New Revision: 542240
URL: http://svn.apache.org/viewvc?view=rev&rev=542240
Log:
Jira UIMA-387: modified XMI serializer to throw an exception when
trying to serialize invalid XML 1.0 characters. Test case added.
http://issues.apache.org/jira/browse/UIMA-387
Added:
incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java
Modified:
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XMLUtils.java
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java?view=diff&rev=542240&r1=542239&r2=542240
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java Mon May 28 07:45:25 2007
@@ -41,6 +41,7 @@
import org.apache.uima.cas.impl.XmiSerializationSharedData.XmiArrayElement;
import org.apache.uima.internal.util.IntStack;
import org.apache.uima.internal.util.IntVector;
+import org.apache.uima.internal.util.XMLUtils;
import org.apache.uima.internal.util.XmlAttribute;
import org.apache.uima.internal.util.XmlElementName;
import org.apache.uima.internal.util.XmlElementNameAndContents;
@@ -800,6 +801,7 @@
break;
}
}
+ checkXml10String(attrValue);
if (attrValue != null && featName != null) {
addAttribute(attrs, featName, attrValue);
}
@@ -822,10 +824,19 @@
return childElements;
}
+ private final void checkXml10String(String s) throws SAXParseException {
+ final int index = XMLUtils.checkForNonXmlCharacters(s);
+ if (index >= 0) {
+ throw new SAXParseException("Trying to serialize non-XML 1.0 character: " + s.charAt(index)
+ + ", 0x" + Integer.toHexString(s.charAt(index)), null);
+ }
+ }
+
private void addText(String text) throws SAXException {
+ checkXml10String(text);
ch.characters(text.toCharArray(), 0, text.length());
}
-
+
private void addAttribute(AttributesImpl attrs, String attrName, String attrValue) {
attrs.addAttribute(null, null, attrName, cdataType, attrValue);
}
Modified: incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XMLUtils.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XMLUtils.java?view=diff&rev=542240&r1=542239&r2=542240
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XMLUtils.java (original)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/internal/util/XMLUtils.java Mon May 28 07:45:25 2007
@@ -23,7 +23,6 @@
import java.io.Writer;
import java.lang.reflect.Constructor;
-import org.apache.uima.util.InvalidXMLException;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -76,26 +75,26 @@
aResultBuf.append("&#").append((int) c).append(';');
} else {
switch (c) {
- case '<':
- aResultBuf.append("<");
- break;
- case '>':
- aResultBuf.append(">");
- break;
- case '&':
- aResultBuf.append("&");
- break;
- case '"':
- aResultBuf.append(""");
- break;
- case '\n':
- aResultBuf.append(aNewlinesToSpaces ? " " : "\n");
- break;
- case '\r':
- aResultBuf.append(aNewlinesToSpaces ? " " : "\r");
- break;
- default:
- aResultBuf.append(c);
+ case '<':
+ aResultBuf.append("<");
+ break;
+ case '>':
+ aResultBuf.append(">");
+ break;
+ case '&':
+ aResultBuf.append("&");
+ break;
+ case '"':
+ aResultBuf.append(""");
+ break;
+ case '\n':
+ aResultBuf.append(aNewlinesToSpaces ? " " : "\n");
+ break;
+ case '\r':
+ aResultBuf.append(aNewlinesToSpaces ? " " : "\r");
+ break;
+ default:
+ aResultBuf.append(c);
}
}
}
@@ -119,7 +118,7 @@
* if an I/O failure occurs when writing to <code>aWriter</code>
*/
public static void writeNormalizedString(String aStr, Writer aWriter, boolean aNewlinesToSpaces)
- throws IOException {
+ throws IOException {
if (aStr == null)
return;
@@ -127,26 +126,26 @@
for (int i = 0; i < len; i++) {
char c = aStr.charAt(i);
switch (c) {
- case '<':
- aWriter.write("<");
- break;
- case '>':
- aWriter.write(">");
- break;
- case '&':
- aWriter.write("&");
- break;
- case '"':
- aWriter.write(""");
- break;
- case '\n':
- aWriter.write(aNewlinesToSpaces ? " " : "\n");
- break;
- case '\r':
- aWriter.write(aNewlinesToSpaces ? " " : "\r");
- break;
- default:
- aWriter.write(c);
+ case '<':
+ aWriter.write("<");
+ break;
+ case '>':
+ aWriter.write(">");
+ break;
+ case '&':
+ aWriter.write("&");
+ break;
+ case '"':
+ aWriter.write(""");
+ break;
+ case '\n':
+ aWriter.write(aNewlinesToSpaces ? " " : "\n");
+ break;
+ case '\r':
+ aWriter.write(aNewlinesToSpaces ? " " : "\r");
+ break;
+ default:
+ aWriter.write(c);
}
}
}
@@ -207,7 +206,7 @@
* if the ContentHandler throws an exception
*/
public static void writePrimitiveValue(Object aObj, ContentHandler aContentHandler)
- throws SAXException {
+ throws SAXException {
final Attributes EMPTY_ATTRIBUTES = new AttributesImpl();
String className = aObj.getClass().getName();
@@ -278,7 +277,7 @@
* @return the value that was read, <code>null</code> if a primitive value could not be
* constructed from the element
*/
- public static Object readPrimitiveValue(Element aElem) throws InvalidXMLException {
+ public static Object readPrimitiveValue(Element aElem) {
// the element's tag name is the lowercase name of the class, minus the
// package name
String tagName = aElem.getTagName();
@@ -374,4 +373,46 @@
return buf.toString().trim();
}
+
+ /**
+ * Check the input string for non-XML 1.0 characters. If non-XML characters are found, return the
+ * position of first offending character. Else, return <code>-1</code>.
+ *
+ * @param s
+ * Input string
+ * @return The position of the first invalid XML character encountered. <code>-1</code> if no
+ * invalid XML characters found.
+ */
+ public static final int checkForNonXmlCharacters(String s) {
+ if (s == null) {
+ return -1;
+ }
+ for (int i = 0; i < s.length(); i++) {
+ if (!isValidXml10Char(s.charAt(i))) {
+ return i;
+ }
+ }
+ return -1;
+ }
+
+ // Check if the character we're looking at is a valid XML 1.0 character. From the XML 1.0 spec:
+ //
+ // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] /* any Unicode
+ // character, excluding the surrogate blocks, FFFE, and FFFF. */
+ //
+ // And from the UTF-16 spec:
+ //
+ // Characters with values between 0x10000 and 0x10FFFF are
+ // represented by a 16-bit integer with a value between 0xD800 and
+ // 0xDBFF (within the so-called high-half zone or high surrogate
+ // area) followed by a 16-bit integer with a value between 0xDC00 and
+ // 0xDFFF (within the so-called low-half zone or low surrogate area).
+ //
+ // So it actually looks as if the surrogate case can be handled correctly by just looking at
+ // individual Java chars.
+ private static final boolean isValidXml10Char(char c) {
+ return ((c == 0x9) || (c == 0xA) || (c == 0xD) || ((c >= 0x20) && (c <= 0xD7FF)) ||
+ ((c >= 0xE000) && (c <= 0xFFFD)));
+ }
+
}
Added: incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java
URL: http://svn.apache.org/viewvc/incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java?view=auto&rev=542240
==============================================================================
--- incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java (added)
+++ incubator/uima/uimaj/trunk/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java Mon May 28 07:45:25 2007
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.cas.impl;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+
+import junit.framework.TestCase;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.resource.metadata.TypeSystemDescription;
+import org.apache.uima.test.junit_extension.JUnitExtension;
+import org.apache.uima.util.CasCreationUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.apache.uima.util.XMLSerializer;
+import org.xml.sax.SAXParseException;
+
+/**
+ * Test case for XMI serialization, in particular, invalid XML 1.0 characters. Other aspects of XMI
+ * serialization are tested elsewhere.
+ */
+public class XmiCasSerializerTest extends TestCase {
+
+ private TypeSystemDescription typeSystemDesc = null;
+
+ private File outputFile = null;
+
+ /**
+ * @param arg0
+ */
+ public XmiCasSerializerTest(String arg0) {
+ super(arg0);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see junit.framework.TestCase#setUp()
+ */
+ protected void setUp() throws Exception {
+ File typeSystemFile = JUnitExtension.getFile("ExampleCas/testTypeSystem.xml");
+ // Temp output file, deleted on exit.
+ this.outputFile = new File(JUnitExtension.getFile("ExampleCas"),
+ "xmiSerializerInvalidCharsTestOutput.xmi");
+ System.out.println(this.outputFile.getAbsolutePath());
+ this.typeSystemDesc = UIMAFramework.getXMLParser().parseTypeSystemDescription(
+ new XMLInputSource(typeSystemFile));
+ }
+
+ public void testInvalidCharsInDocumentText() throws Exception {
+ CAS cas = CasCreationUtils.createCas(this.typeSystemDesc, null, null);
+ char badChar = 0x1A;
+ cas.setDocumentText("Text with bad char: " + badChar);
+ OutputStream out = new FileOutputStream(this.outputFile);
+ XMLSerializer xmlSerializer = new XMLSerializer(out);
+ XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(cas.getTypeSystem());
+ boolean caughtException = false;
+ try {
+ xmiCasSerializer.serialize(cas, xmlSerializer.getContentHandler());
+ } catch (SAXParseException e) {
+ caughtException = true;
+ }
+ out.close();
+ assertTrue("XMI serialization of document text with bad XML 1.0 char should throw exception",
+ caughtException);
+ }
+
+ public void testInvalidCharsInFeatureValue() throws Exception {
+ CAS cas = CasCreationUtils.createCas(this.typeSystemDesc, null, null);
+ char badChar = 0x1A;
+ cas.setDocumentLanguage("a" + badChar);
+ OutputStream out = new FileOutputStream(this.outputFile);
+ XMLSerializer xmlSerializer = new XMLSerializer(out);
+ XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(cas.getTypeSystem());
+ boolean caughtException = false;
+ try {
+ xmiCasSerializer.serialize(cas, xmlSerializer.getContentHandler());
+ } catch (SAXParseException e) {
+ caughtException = true;
+ }
+ out.close();
+ assertTrue("XMI serialization of document text with bad XML 1.0 char should throw exception",
+ caughtException);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see junit.framework.TestCase#tearDown()
+ */
+ protected void tearDown() throws Exception {
+ if ((this.outputFile != null) && this.outputFile.exists()) {
+ this.outputFile.delete();
+ }
+ }
+
+}