You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by le...@locus.apache.org on 2000/11/09 03:19:11 UTC

cvs commit: xml-xerces/java/src/org/apache/xerces/impl XMLDocumentScanner.java XMLEntityManager.java XMLScanner.java

lehors      00/11/08 18:19:11

  Modified:    java/src/org/apache/xerces/impl Tag: xerces_j_2
                        XMLDocumentScanner.java XMLEntityManager.java
                        XMLScanner.java
  Log:
  newline normalization must only be performed when parsing _external_
  entities.
  This change relies on isExternal() to figure this out, this means this would
  have to return true for the document entity itself, no matter whether we have
  a systemID or not. Not sure this is already true in case of a document read
  from an InputStream...
  Scanners are modified to handle the possible \r character that they did not
  expect before.
  
  Revision  Changes    Path
  No                   revision
  
  
  No                   revision
  
  
  1.1.2.71  +14 -4     xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java
  
  Index: XMLDocumentScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLDocumentScanner.java,v
  retrieving revision 1.1.2.70
  retrieving revision 1.1.2.71
  diff -u -r1.1.2.70 -r1.1.2.71
  --- XMLDocumentScanner.java	2000/11/08 01:28:30	1.1.2.70
  +++ XMLDocumentScanner.java	2000/11/09 02:19:06	1.1.2.71
  @@ -102,7 +102,7 @@
    * @author Andy Clark, IBM
    * @author Arnaud  Le Hors, IBM
    *
  - * @version $Id: XMLDocumentScanner.java,v 1.1.2.70 2000/11/08 01:28:30 lehors Exp $
  + * @version $Id: XMLDocumentScanner.java,v 1.1.2.71 2000/11/09 02:19:06 lehors Exp $
    */
   public class XMLDocumentScanner
       extends XMLScanner
  @@ -795,9 +795,19 @@
        */
       protected int scanContent() throws IOException, SAXException {
   
  -        int c = fEntityScanner.scanContent(fString);
  -        if (fDocumentHandler != null && fString.length > 0) {
  -            fDocumentHandler.characters(fString);
  +        XMLString content = fString;
  +        int c = fEntityScanner.scanContent(content);
  +        if (c == '\r') {
  +            // happens when there is the character reference 
  +            fEntityScanner.scanChar();
  +            fStringBuffer.clear();
  +            fStringBuffer.append(fString);
  +            fStringBuffer.append((char)c);
  +            content = fStringBuffer;
  +            c = -1;
  +        }
  +        if (fDocumentHandler != null && content.length > 0) {
  +            fDocumentHandler.characters(content);
           }
   
           if (c == ']' && fString.length == 0) {
  
  
  
  1.1.2.60  +49 -22    xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java
  
  Index: XMLEntityManager.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLEntityManager.java,v
  retrieving revision 1.1.2.59
  retrieving revision 1.1.2.60
  diff -u -r1.1.2.59 -r1.1.2.60
  --- XMLEntityManager.java	2000/11/06 19:24:39	1.1.2.59
  +++ XMLEntityManager.java	2000/11/09 02:19:06	1.1.2.60
  @@ -115,7 +115,7 @@
    * @author Stubs generated by DesignDoc on Mon Sep 18 18:23:16 PDT 2000
    * @author Andy Clark, IBM
    *
  - * @version $Id: XMLEntityManager.java,v 1.1.2.59 2000/11/06 19:24:39 lehors Exp $
  + * @version $Id: XMLEntityManager.java,v 1.1.2.60 2000/11/09 02:19:06 lehors Exp $
    */
   public class XMLEntityManager
       implements XMLComponent {
  @@ -551,7 +551,7 @@
   
           // resolve external entity
           XMLInputSource xmlInputSource = null;
  -        if (entity.isExternal()) {
  +        if (external) {
               ExternalEntity externalEntity = (ExternalEntity)entity;
               String publicId = externalEntity.publicId;
               String systemId = externalEntity.systemId;
  @@ -1598,9 +1598,19 @@
               if (DEBUG_BUFFER) {
                   System.out.print(")peekChar: ");
                   print();
  -                System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
  +                if (fCurrentEntity.isExternal()) {
  +                    System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
  +                }
  +                else {
  +                    System.out.println(" -> '"+(char)c+"'");
  +                }
  +            }
  +            if (fCurrentEntity.isExternal()) {
  +                return c != '\r' ? c : '\n';
               }
  -            return c != '\r' ? c : '\n';
  +            else {
  +                return c;
  +            }
   
           } // peekChar():int
       
  @@ -1628,21 +1638,24 @@
   
               // scan character
               int c = fCurrentEntity.ch[fCurrentEntity.position++];
  -            if (c == '\r' || c == '\n') {
  +            boolean external = false;
  +            if (c == '\n' ||
  +                (c == '\r' && (external = fCurrentEntity.isExternal()))) {
                   fCurrentEntity.lineNumber++;
                   fCurrentEntity.columnNumber = 1;
                   if (fCurrentEntity.position == fCurrentEntity.count) {
                       fCurrentEntity.ch[0] = (char)c;
                       load(1, false);
                   }
  -                if (c == '\r') {
  +                if (c == '\r' && external) {
                       if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
                           fCurrentEntity.position--;
                       }
                       c = '\n';
                   }
                   else {
  -                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
  +                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  +                        && fCurrentEntity.isExternal()) {
                           fCurrentEntity.position++;
                       }
                   }
  @@ -1948,7 +1961,8 @@
               int offset = fCurrentEntity.position;
               int c = fCurrentEntity.ch[offset];
               int newlines = 0;
  -            if (c == '\r' || c == '\n') {
  +            boolean external = fCurrentEntity.isExternal();
  +            if (c == '\n' || (c == '\r' && external)) {
                   if (DEBUG_BUFFER) {
                       System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
                       print();
  @@ -1956,7 +1970,7 @@
                   }
                   do {
                       c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                    if (c == '\r') {
  +                    if (c == '\r' && external) {
                           newlines++;
                           fCurrentEntity.lineNumber++;
                           fCurrentEntity.columnNumber = 1;
  @@ -1983,7 +1997,8 @@
                                   break;
                               }
                           }
  -                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
  +                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  +                            && external) {
                               fCurrentEntity.position++;
                               offset++;
                           }
  @@ -2028,10 +2043,13 @@
               // return next character
               if (fCurrentEntity.position != fCurrentEntity.count) {
                   c = fCurrentEntity.ch[fCurrentEntity.position];
  -                if (c == '\r' || c == '\n') {
  +                if (c == '\r' && external) {
                       c = '\n';
                   }
               }
  +            else {
  +                c = -1;
  +            }
               if (DEBUG_BUFFER) {
                   System.out.print(")scanContent: ");
                   print();
  @@ -2093,7 +2111,8 @@
               int offset = fCurrentEntity.position;
               int c = fCurrentEntity.ch[offset];
               int newlines = 0;
  -            if (c == '\r' || c == '\n') {
  +            boolean external = fCurrentEntity.isExternal();
  +            if (c == '\n' || (c == '\r' && external)) {
                   if (DEBUG_BUFFER) {
                       System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
                       print();
  @@ -2101,7 +2120,7 @@
                   }
                   do {
                       c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                    if (c == '\r') {
  +                    if (c == '\r' && external) {
                           newlines++;
                           fCurrentEntity.lineNumber++;
                           fCurrentEntity.columnNumber = 1;
  @@ -2128,7 +2147,8 @@
                                   break;
                               }
                           }
  -                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
  +                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  +                            && external) {
                               fCurrentEntity.position++;
                               offset++;
                           }
  @@ -2162,7 +2182,7 @@
               while (fCurrentEntity.position < fCurrentEntity.count) {
                   c = fCurrentEntity.ch[fCurrentEntity.position++];
                   if ((c == quote &&
  -                     (!fCurrentEntity.literal || fCurrentEntity.isExternal()))
  +                     (!fCurrentEntity.literal || external))
                       || c == '%' || !XMLChar.isContent(c)) {
                       fCurrentEntity.position--;
                       break;
  @@ -2182,6 +2202,9 @@
                       c = -1;
                   }
               }
  +            else {
  +                c = -1;
  +            }
               if (DEBUG_BUFFER) {
                   System.out.print(")scanLiteral, '"+(char)quote+"': ");
                   print();
  @@ -2250,7 +2273,8 @@
               int offset = fCurrentEntity.position;
               int c = fCurrentEntity.ch[offset];
               int newlines = 0;
  -            if (c == '\r' || c == '\n') {
  +            boolean external = fCurrentEntity.isExternal();
  +            if (c == '\n' || (c == '\r' && external)) {
                   if (DEBUG_BUFFER) {
                       System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
                       print();
  @@ -2258,7 +2282,7 @@
                   }
                   do {
                       c = fCurrentEntity.ch[fCurrentEntity.position++];
  -                    if (c == '\r') {
  +                    if (c == '\r' && external) {
                           newlines++;
                           fCurrentEntity.lineNumber++;
                           fCurrentEntity.columnNumber = 1;
  @@ -2290,7 +2314,8 @@
                               }
                           }
                           /***/
  -                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r') {
  +                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
  +                            && external) {
                               fCurrentEntity.position++;
                               offset++;
                           }
  @@ -2343,7 +2368,7 @@
                           break;
                       }
                   }
  -                else if (c == '\r' || c == '\n') {
  +                else if (c == '\n' || (external && c == '\r')) {
                       fCurrentEntity.position--;
                       break;
                   }
  @@ -2449,22 +2474,24 @@
               // skip spaces
               int c = fCurrentEntity.ch[fCurrentEntity.position];
               if (XMLChar.isSpace(c)) {
  +                boolean external = fCurrentEntity.isExternal();
                   do {
                       // handle newlines
  -                    if (c == '\r' || c == '\n') {
  +                    if (c == '\n' || (external && c == '\r')) {
                           fCurrentEntity.lineNumber++;
                           fCurrentEntity.columnNumber = 1;
                           if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                               fCurrentEntity.ch[0] = (char)c;
                               load(1, true);
                           }
  -                        if (c == '\r') {
  +                        if (c == '\r' && external) {
                               if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
                                   fCurrentEntity.position--;
                               }
                           }
                           else {
  -                            if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r') {
  +                            if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
  +                                && external) {
                                   fCurrentEntity.position++;
                               }
                           }
  
  
  
  1.1.2.33  +6 -1      xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java
  
  Index: XMLScanner.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/impl/Attic/XMLScanner.java,v
  retrieving revision 1.1.2.32
  retrieving revision 1.1.2.33
  diff -u -r1.1.2.32 -r1.1.2.33
  --- XMLScanner.java	2000/11/06 20:10:03	1.1.2.32
  +++ XMLScanner.java	2000/11/09 02:19:07	1.1.2.33
  @@ -93,7 +93,7 @@
    * @author Andy Clark, IBM
    * @author Arnaud  Le Hors, IBM
    *
  - * @version $Id: XMLScanner.java,v 1.1.2.32 2000/11/06 20:10:03 lehors Exp $
  + * @version $Id: XMLScanner.java,v 1.1.2.33 2000/11/09 02:19:07 lehors Exp $
    */
   public abstract class XMLScanner 
       implements XMLComponent {
  @@ -702,6 +702,11 @@
                   }
                   else if (c == '%') {
                       fStringBuffer2.append((char)fEntityScanner.scanChar());
  +                }
  +                else if (c == '\r') {
  +                    // this happens when we have the character reference &#13;
  +                    fEntityScanner.scanChar();
  +                    fStringBuffer2.append(' '); // normalize to #x20
                   }
                   else if (c != -1 && XMLChar.isHighSurrogate(c)) {
                       scanSurrogates(fStringBuffer2);