You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by le...@locus.apache.org on 2000/11/09 03:19:11 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/impl XMLDocumentScanner.java XMLEntityManager.java XMLScanner.java
lehors 00/11/08 18:19:11
Modified: java/src/org/apache/xerces/impl Tag: xerces_j_2
XMLDocumentScanner.java XMLEntityManager.java
XMLScanner.java
Log:
newline normalization must only be performed when parsing _external_
entities.
This change relies on isExternal() to figure this out, this means this would
have to return true for the document entity itself, no matter whether we have
a systemID or not. Not sure this is already true in case of a document read
from an InputStream...
Scanners are modified to handle the possible \r character that they did not
expect before.
Revision Changes Path
No revision
No revision
1.1.2.71 +14 -4 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java
Index: XMLDocumentScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java,v
retrieving revision 1.1.2.70
retrieving revision 1.1.2.71
diff -u -r1.1.2.70 -r1.1.2.71
--- XMLDocumentScanner.java 2000/11/08 01:28:30 1.1.2.70
+++ XMLDocumentScanner.java 2000/11/09 02:19:06 1.1.2.71
@@ -102,7 +102,7 @@
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLDocumentScanner.java,v 1.1.2.70 2000/11/08 01:28:30 lehors Exp $
+ * @version $Id: XMLDocumentScanner.java,v 1.1.2.71 2000/11/09 02:19:06 lehors Exp $
*/
public class XMLDocumentScanner
extends XMLScanner
@@ -795,9 +795,19 @@
*/
protected int scanContent() throws IOException, SAXException {
- int c = fEntityScanner.scanContent(fString);
- if (fDocumentHandler != null && fString.length > 0) {
- fDocumentHandler.characters(fString);
+ XMLString content = fString;
+ int c = fEntityScanner.scanContent(content);
+ if (c == '\r') {
+ // happens when there is the character reference
+ fEntityScanner.scanChar();
+ fStringBuffer.clear();
+ fStringBuffer.append(fString);
+ fStringBuffer.append((char)c);
+ content = fStringBuffer;
+ c = -1;
+ }
+ if (fDocumentHandler != null && content.length > 0) {
+ fDocumentHandler.characters(content);
}
if (c == ']' && fString.length == 0) {
1.1.2.60 +49 -22 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java
Index: XMLEntityManager.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java,v
retrieving revision 1.1.2.59
retrieving revision 1.1.2.60
diff -u -r1.1.2.59 -r1.1.2.60
--- XMLEntityManager.java 2000/11/06 19:24:39 1.1.2.59
+++ XMLEntityManager.java 2000/11/09 02:19:06 1.1.2.60
@@ -115,7 +115,7 @@
* @author Stubs generated by DesignDoc on Mon Sep 18 18:23:16 PDT 2000
* @author Andy Clark, IBM
*
- * @version $Id: XMLEntityManager.java,v 1.1.2.59 2000/11/06 19:24:39 lehors Exp $
+ * @version $Id: XMLEntityManager.java,v 1.1.2.60 2000/11/09 02:19:06 lehors Exp $
*/
public class XMLEntityManager
implements XMLComponent {
@@ -551,7 +551,7 @@
// resolve external entity
XMLInputSource xmlInputSource = null;
- if (entity.isExternal()) {
+ if (external) {
ExternalEntity externalEntity = (ExternalEntity)entity;
String publicId = externalEntity.publicId;
String systemId = externalEntity.systemId;
@@ -1598,9 +1598,19 @@
if (DEBUG_BUFFER) {
System.out.print(")peekChar: ");
print();
- System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
+ if (fCurrentEntity.isExternal()) {
+ System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
+ }
+ else {
+ System.out.println(" -> '"+(char)c+"'");
+ }
+ }
+ if (fCurrentEntity.isExternal()) {
+ return c != '\r' ? c : '\n';
}
- return c != '\r' ? c : '\n';
+ else {
+ return c;
+ }
} // peekChar():int
@@ -1628,21 +1638,24 @@
// scan character
int c = fCurrentEntity.ch[fCurrentEntity.position++];
- if (c == '\r' || c == '\n') {
+ boolean external = false;
+ if (c == '\n' ||
+ (c == '\r' && (external = fCurrentEntity.isExternal()))) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count) {
fCurrentEntity.ch[0] = (char)c;
load(1, false);
}
- if (c == '\r') {
+ if (c == '\r' && external) {
if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
fCurrentEntity.position--;
}
c = '\n';
}
else {
- if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
+ if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
+ && fCurrentEntity.isExternal()) {
fCurrentEntity.position++;
}
}
@@ -1948,7 +1961,8 @@
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
- if (c == '\r' || c == '\n') {
+ boolean external = fCurrentEntity.isExternal();
+ if (c == '\n' || (c == '\r' && external)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
@@ -1956,7 +1970,7 @@
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
- if (c == '\r') {
+ if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
@@ -1983,7 +1997,8 @@
break;
}
}
- if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
+ if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
+ && external) {
fCurrentEntity.position++;
offset++;
}
@@ -2028,10 +2043,13 @@
// return next character
if (fCurrentEntity.position != fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position];
- if (c == '\r' || c == '\n') {
+ if (c == '\r' && external) {
c = '\n';
}
}
+ else {
+ c = -1;
+ }
if (DEBUG_BUFFER) {
System.out.print(")scanContent: ");
print();
@@ -2093,7 +2111,8 @@
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
- if (c == '\r' || c == '\n') {
+ boolean external = fCurrentEntity.isExternal();
+ if (c == '\n' || (c == '\r' && external)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
@@ -2101,7 +2120,7 @@
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
- if (c == '\r') {
+ if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
@@ -2128,7 +2147,8 @@
break;
}
}
- if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
+ if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
+ && external) {
fCurrentEntity.position++;
offset++;
}
@@ -2162,7 +2182,7 @@
while (fCurrentEntity.position < fCurrentEntity.count) {
c = fCurrentEntity.ch[fCurrentEntity.position++];
if ((c == quote &&
- (!fCurrentEntity.literal || fCurrentEntity.isExternal()))
+ (!fCurrentEntity.literal || external))
|| c == '%' || !XMLChar.isContent(c)) {
fCurrentEntity.position--;
break;
@@ -2182,6 +2202,9 @@
c = -1;
}
}
+ else {
+ c = -1;
+ }
if (DEBUG_BUFFER) {
System.out.print(")scanLiteral, '"+(char)quote+"': ");
print();
@@ -2250,7 +2273,8 @@
int offset = fCurrentEntity.position;
int c = fCurrentEntity.ch[offset];
int newlines = 0;
- if (c == '\r' || c == '\n') {
+ boolean external = fCurrentEntity.isExternal();
+ if (c == '\n' || (c == '\r' && external)) {
if (DEBUG_BUFFER) {
System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
print();
@@ -2258,7 +2282,7 @@
}
do {
c = fCurrentEntity.ch[fCurrentEntity.position++];
- if (c == '\r') {
+ if (c == '\r' && external) {
newlines++;
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
@@ -2290,7 +2314,8 @@
}
}
/***/
- if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
+ if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
+ && external) {
fCurrentEntity.position++;
offset++;
}
@@ -2343,7 +2368,7 @@
break;
}
}
- else if (c == '\r' || c == '\n') {
+ else if (c == '\n' || (external && c == '\r')) {
fCurrentEntity.position--;
break;
}
@@ -2449,22 +2474,24 @@
// skip spaces
int c = fCurrentEntity.ch[fCurrentEntity.position];
if (XMLChar.isSpace(c)) {
+ boolean external = fCurrentEntity.isExternal();
do {
// handle newlines
- if (c == '\r' || c == '\n') {
+ if (c == '\n' || (external && c == '\r')) {
fCurrentEntity.lineNumber++;
fCurrentEntity.columnNumber = 1;
if (fCurrentEntity.position == fCurrentEntity.count - 1) {
fCurrentEntity.ch[0] = (char)c;
load(1, true);
}
- if (c == '\r') {
+ if (c == '\r' && external) {
if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
fCurrentEntity.position--;
}
}
else {
- if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r') {
+ if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
+ && external) {
fCurrentEntity.position++;
}
}
1.1.2.33 +6 -1 xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java
Index: XMLScanner.java
===================================================================
RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java,v
retrieving revision 1.1.2.32
retrieving revision 1.1.2.33
diff -u -r1.1.2.32 -r1.1.2.33
--- XMLScanner.java 2000/11/06 20:10:03 1.1.2.32
+++ XMLScanner.java 2000/11/09 02:19:07 1.1.2.33
@@ -93,7 +93,7 @@
* @author Andy Clark, IBM
* @author Arnaud Le Hors, IBM
*
- * @version $Id: XMLScanner.java,v 1.1.2.32 2000/11/06 20:10:03 lehors Exp $
+ * @version $Id: XMLScanner.java,v 1.1.2.33 2000/11/09 02:19:07 lehors Exp $
*/
public abstract class XMLScanner
implements XMLComponent {
@@ -702,6 +702,11 @@
}
else if (c == '%') {
fStringBuffer2.append((char)fEntityScanner.scanChar());
+ }
+ else if (c == '\r') {
+ // this happens when we have the character reference
+ fEntityScanner.scanChar();
+ fStringBuffer2.append(' '); // normalize to #x20
}
else if (c != -1 && XMLChar.isHighSurrogate(c)) {
scanSurrogates(fStringBuffer2);