You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by le...@locus.apache.org on 2000/10/31 02:38:20 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/impl XMLDTDScanner.java XMLDocumentScanner.java XMLScanner.java
lehors 00/10/30 17:38:19
Modified: java/src/org/apache/xerces/util Tag: xerces_j_2 XMLChar.java
java/src/org/apache/xerces/impl Tag: xerces_j_2
XMLDTDScanner.java XMLDocumentScanner.java
XMLScanner.java
Log:
added support for supplemental characters and surrogates
Revision Changes Path
No revision
No revision
1.1.2.9 +48 -1 xml-xerces/java/src/org/apache/xerces/util/Attic/XMLChar.java
Index: XMLChar.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/Attic/XMLChar.java,v
retrieving revision 1.1.2.8
retrieving revision 1.1.2.9
diff -u -r1.1.2.8 -r1.1.2.9
--- XMLChar.java 2000/10/28 01:14:02 1.1.2.8
+++ XMLChar.java 2000/10/31 01:38:15 1.1.2.9
@@ -76,8 +76,9 @@
* @author Stubs generated by DesignDoc on Wed Jun 07 11:58:44 PDT 2000
* @author Andy Clark, IBM
* @author Eric Ye, IBM
+ * @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLChar.java,v 1.1.2.8 2000/10/28 01:14:02 andyc Exp $
+ * @version $Id: XMLChar.java,v 1.1.2.9 2000/10/31 01:38:15 lehors Exp $
*/
public class XMLChar {
@@ -362,6 +363,52 @@
//
// Public static methods
//
+
+ /**
+ * Returns true if the specified character is a supplemental character.
+ *
+ * @param c The character to check.
+ */
+ public static boolean isSupplemental(int c) {
+ return (c >= 0x10000 && c <= 0x10FFFF);
+ }
+
+ /**
+ * Returns the high surrogate of a supplemental character
+ *
+ * @param c The supplementatl character to "split".
+ */
+ public static char highSurrogate(int c) {
+ return (char) (((c - 0x00010000) >> 10) + 0xd800);
+ }
+
+ /**
+ * Returns the low surrogate of a supplemental character
+ *
+ * @param c The supplemental character to "split".
+ */
+ public static char lowSurrogate(int c) {
+ return (char) (((c - 0x00010000) & 0x3ff) + 0xdc00);
+ }
+
+ /**
+ * Returns whether the given character is a high surrogate
+ *
+ * @param c The character to check.
+ */
+ public static boolean isHighSurrogate(int c) {
+ return (0xd800 <= c && c <= 0xdbff);
+ }
+
+ /**
+ * Returns whether the given character is a low surrogate
+ *
+ * @param c The character to check.
+ */
+ public static boolean isLowSurrogate(int c) {
+ return (0xdc00 <= c && c <= 0xdfff);
+ }
+
/**
* Returns true if the specified character is valid. This method
No revision
No revision
1.1.2.56 +6 -4 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDTDScanner.java
Index: XMLDTDScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDTDScanner.java,v
retrieving revision 1.1.2.55
retrieving revision 1.1.2.56
diff -u -r1.1.2.55 -r1.1.2.56
--- XMLDTDScanner.java 2000/10/30 22:26:43 1.1.2.55
+++ XMLDTDScanner.java 2000/10/31 01:38:17 1.1.2.56
@@ -87,7 +87,7 @@
* @author Andy Clark, IBM
* @author Glenn Marcy, IBM
*
- * @version $Id: XMLDTDScanner.java,v 1.1.2.55 2000/10/30 22:26:43 lehors Exp $
+ * @version $Id: XMLDTDScanner.java,v 1.1.2.56 2000/10/31 01:38:17 lehors Exp $
*/
public class XMLDTDScanner
extends XMLScanner
@@ -1674,8 +1674,7 @@
fStringBuffer2.append(fString);
if (fEntityScanner.skipChar('&')) {
if (fEntityScanner.skipChar('#')) {
- char c = (char) scanCharReferenceValue();
- fStringBuffer2.append(c);
+ scanCharReferenceValue(fStringBuffer2);
}
else {
fStringBuffer2.append('&');
@@ -1708,7 +1707,10 @@
}
else {
int c = fEntityScanner.peekChar();
- if (XMLChar.isInvalid(c)) {
+ if (XMLChar.isHighSurrogate(c)) {
+ scanSurrogates(fStringBuffer2);
+ }
+ else if (XMLChar.isInvalid(c)) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
"InvalidCharInLiteral",
new Object[] { Integer.toHexString(c) },
1.1.2.54 +41 -23 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java
Index: XMLDocumentScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java,v
retrieving revision 1.1.2.53
retrieving revision 1.1.2.54
diff -u -r1.1.2.53 -r1.1.2.54
--- XMLDocumentScanner.java 2000/10/28 01:13:59 1.1.2.53
+++ XMLDocumentScanner.java 2000/10/31 01:38:17 1.1.2.54
@@ -100,8 +100,9 @@
* @author Glenn Marcy, IBM
* @author Stubs generated by DesignDoc on Mon Sep 11 11:10:57 PDT 2000
* @author Andy Clark, IBM
+ * @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLDocumentScanner.java,v 1.1.2.53 2000/10/28 01:13:59 andyc Exp $
+ * @version $Id: XMLDocumentScanner.java,v 1.1.2.54 2000/10/31 01:38:17 lehors Exp $
*/
public class XMLDocumentScanner
extends XMLScanner
@@ -269,6 +270,9 @@
/** Single character array. */
private final char[] fSingleChar = new char[1];
+ /** String buffer. */
+ private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
+
/** External entity. */
private XMLEntityManager.ExternalEntity fExternalEntity = new XMLEntityManager.ExternalEntity();
@@ -965,9 +969,9 @@
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** set attribute offset: "+fAttributeOffset);
}
- fStringBuffer.clear();
+ fStringBuffer2.clear();
do {
- fStringBuffer.append(fString);
+ fStringBuffer2.append(fString);
fAttributeOffset += fString.length;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
@@ -975,9 +979,8 @@
if (c == '&') {
fEntityScanner.skipChar('&');
if (fEntityScanner.skipChar('#')) {
- int cv = scanCharReferenceValue();
- if (cv != -1) {
- fStringBuffer.append((char)cv);
+ int ch = scanCharReferenceValue(fStringBuffer2);
+ if (ch != -1) {
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
@@ -997,35 +1000,35 @@
null, XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
if (entityName == fAmpSymbol) {
- fStringBuffer.append('&');
+ fStringBuffer2.append('&');
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
}
}
else if (entityName == fAposSymbol) {
- fStringBuffer.append('\'');
+ fStringBuffer2.append('\'');
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
}
}
else if (entityName == fLtSymbol) {
- fStringBuffer.append('<');
+ fStringBuffer2.append('<');
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
}
}
else if (entityName == fGtSymbol) {
- fStringBuffer.append('>');
+ fStringBuffer2.append('>');
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
}
}
else if (entityName == fQuotSymbol) {
- fStringBuffer.append('"');
+ fStringBuffer2.append('"');
fAttributeOffset++;
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** increment attribute offset: "+fAttributeOffset);
@@ -1058,7 +1061,10 @@
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
else if (c == '%') {
- fStringBuffer.append((char)fEntityScanner.scanChar());
+ fStringBuffer2.append((char)fEntityScanner.scanChar());
+ }
+ else if (c != -1 && XMLChar.isHighSurrogate(c)) {
+ scanSurrogates(fStringBuffer2);
}
else if (c != -1 && XMLChar.isInvalid(c)) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
@@ -1072,13 +1078,13 @@
if (c != quote || entityDepth == fEntityDepth) {
break;
}
- fStringBuffer.append(fString);
- fStringBuffer.append((char)fEntityScanner.scanChar());
+ fStringBuffer2.append(fString);
+ fStringBuffer2.append((char)fEntityScanner.scanChar());
}
} while (c != quote);
fAttributeOffset += fString.length;
- fStringBuffer.append(fString);
- value = fStringBuffer;
+ fStringBuffer2.append(fString);
+ value = fStringBuffer2;
int attrEntityCount = fAttributeEntityStack.size();
if (DEBUG_ATTR_ENTITIES) {
System.out.println("*** add remaining attribute entities: "+attrEntityCount);
@@ -1145,6 +1151,7 @@
} // scanContent():int
+
/**
* Scans a CDATA section.
* <p>
@@ -1267,14 +1274,14 @@
*/
protected void scanCharReference()
throws IOException, SAXException {
-
- int value = scanCharReferenceValue();
- // call handler
- if (fDocumentHandler != null) {
- fSingleChar[0] = (char)value;
- fString.setValues(fSingleChar, 0, 1);
- fDocumentHandler.characters(fString);
+ fStringBuffer2.clear();
+ int ch = scanCharReferenceValue(fStringBuffer2);
+ if (ch != -1) {
+ // call handler
+ if (fDocumentHandler != null) {
+ fDocumentHandler.characters(fStringBuffer2);
+ }
}
} // scanCharReference()
@@ -1940,6 +1947,17 @@
fEntityScanner.scanChar();
setScannerState(SCANNER_STATE_REFERENCE);
break;
+ }
+ else if (c != -1 && XMLChar.isHighSurrogate(c)) {
+ // special case: we have surrogates
+ fStringBuffer.clear();
+ if (scanSurrogates(fStringBuffer)) {
+
+ // call handler
+ if (fDocumentHandler != null) {
+ fDocumentHandler.characters(fStringBuffer);
+ }
+ }
}
else if (c != -1 && XMLChar.isInvalid(c)) {
fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1.1.2.22 +49 -3 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java
Index: XMLScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java,v
retrieving revision 1.1.2.21
retrieving revision 1.1.2.22
diff -u -r1.1.2.21 -r1.1.2.22
--- XMLScanner.java 2000/10/31 00:35:47 1.1.2.21
+++ XMLScanner.java 2000/10/31 01:38:18 1.1.2.22
@@ -92,7 +92,7 @@
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLScanner.java,v 1.1.2.21 2000/10/31 00:35:47 andyc Exp $
+ * @version $Id: XMLScanner.java,v 1.1.2.22 2000/10/31 01:38:18 lehors Exp $
*/
public abstract class XMLScanner
implements XMLComponent {
@@ -555,7 +555,9 @@
/**
- * Scans a character reference.
+ * Scans a character reference and append the corresponding chars to the
+ * specified buffer.
+ *
* <p>
* <pre>
* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
@@ -564,9 +566,11 @@
* <strong>Note:</strong> This method uses fStringBuffer, anything in it
* at the time of calling is lost.
*
+ * @param buf the character buffer to append chars to
+ *
* @return the character value
*/
- protected int scanCharReferenceValue()
+ protected int scanCharReferenceValue(XMLStringBuffer buf)
throws IOException, SAXException {
// scan hexadecimal value
@@ -626,7 +630,49 @@
Integer.toString(value, 16) },
XMLErrorReporter.SEVERITY_FATAL_ERROR);
}
+
+ // append corresponding chars to the given buffer
+ if (!XMLChar.isSupplemental(value)) {
+ buf.append((char) value);
+ }
+ else {
+ // character is supplemental, split it into surrogate chars
+ buf.append(XMLChar.highSurrogate(value));
+ buf.append(XMLChar.lowSurrogate(value));
+ }
+
return value;
}
+
+
+ /**
+ * Scans surrogates and append them to the specified buffer.
+ * <p>
+ * <strong>Note:</strong> This assumes the current char has already been
+ * identified as a high surrogate.
+ *
+ * @returns True if it succeeded.
+ */
+ protected boolean scanSurrogates(XMLStringBuffer buf)
+ throws IOException, SAXException {
+
+ int high = fEntityScanner.scanChar();
+ int low = fEntityScanner.peekChar();
+ if (!XMLChar.isLowSurrogate(low)) {
+ fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
+ "InvalidCharInContent",
+ new Object[] {Integer.toString(high, 16)},
+ XMLErrorReporter.SEVERITY_FATAL_ERROR);
+ return false;
+ }
+ fEntityScanner.scanChar();
+
+ // fill in the buffer
+ buf.append((char)high);
+ buf.append((char)low);
+
+ return true;
+
+ } // scanSurrogates():boolean
} // class XMLScanner