You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by ne...@apache.org on 2002/07/31 16:08:13 UTC
cvs commit: xml-xerces/java/src/org/apache/xerces/parsers XML11Configuration.java

neilg       2002/07/31 07:08:12

  Modified:    java/src/org/apache/xerces/util XMLChar.java
  Added:       java/src/org/apache/xerces/impl XML11EntityManager.java
                        XML11DocumentScannerImpl.java
                        XML11DTDScannerImpl.java
               java/src/org/apache/xerces/parsers XML11Configuration.java
  Log:
  Implemented a test platform on which XML 1.1 can be implemented.
  
  Here, I have only implemented XML 1.1 whitespace handling and support for the new version number.  But, using this architecutre, it should be possible to implement the rest of the new features without disturbing the current XML 1.0 default.
  
  The implementation is based on a new configuration (XML11Configuration) which extends StandardParserConfiguration by creating a new XMLEntityManager, XMLDocumentScanner and XMLDTDScanner components which implement XML 1.1 functionality.  This is not the most efficient approach from the XML 1.1 perspective, but it allows the current code to remain (almost completely) unchanged--particularly advantageious given the performance challenges of some XML 1.1 features.  Still to do will be to properly extend XMLChar to take into account things like the new XML 1.1 definition of Name/NCName.
  
  Revision  Changes    Path
  1.9       +12 -1     xml-xerces/java/src/org/apache/xerces/util/XMLChar.java
  
  Index: XMLChar.java
  ===================================================================
  RCS file: /home/cvs/xml-xerces/java/src/org/apache/xerces/util/XMLChar.java,v
  retrieving revision 1.8
  retrieving revision 1.9
  diff -u -r1.8 -r1.9
  --- XMLChar.java	8 Jul 2002 16:22:56 -0000	1.8
  +++ XMLChar.java	31 Jul 2002 14:08:12 -0000	1.9
  @@ -489,6 +489,17 @@
       } // isSpace(int):boolean
   
       /**
  +     * Returns true if the specified character is a space character
  +     * as amdended in the XML 1.1 specification.
  +     *
  +     * @param c The character to check.
  +     */
  +    public static boolean isXML11Space(int c) {
  +        return (c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0) ||
  +            c == 0x85 || c == 0x2028;
  +    } // isXML11Space(int):boolean
  +
  +    /**
        * Returns true if the specified character is a valid name start
        * character as defined by production [5] in the XML 1.0
        * specification.
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xerces/impl/XML11EntityManager.java
  
  Index: XML11EntityManager.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999-2002 The Apache Software Foundation.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.xerces.impl;
  
  import java.io.EOFException;
  import java.io.FileInputStream;
  import java.io.FilterReader;
  import java.io.InputStream;
  import java.io.InputStreamReader;
  import java.io.IOException;
  import java.io.Reader;
  import java.io.StringReader;
  import java.net.URL;
  import java.util.Hashtable;
  import java.util.Stack;
  import java.util.Vector;
  
  import org.apache.xerces.impl.XMLErrorReporter;
  import org.apache.xerces.impl.io.ASCIIReader;
  import org.apache.xerces.impl.io.UCSReader;
  import org.apache.xerces.impl.io.UTF8Reader;
  import org.apache.xerces.impl.msg.XMLMessageFormatter;
  import org.apache.xerces.impl.validation.ValidationManager;
  
  import org.apache.xerces.util.EncodingMap;
  import org.apache.xerces.util.SymbolTable;
  import org.apache.xerces.util.URI;
  import org.apache.xerces.util.XMLChar;
  import org.apache.xerces.util.XMLResourceIdentifierImpl;
  
  import org.apache.xerces.xni.QName;
  import org.apache.xerces.xni.XMLResourceIdentifier;
  import org.apache.xerces.xni.XMLString;
  import org.apache.xerces.xni.XNIException;
  import org.apache.xerces.xni.parser.XMLComponent;
  import org.apache.xerces.xni.parser.XMLComponentManager;
  import org.apache.xerces.xni.parser.XMLConfigurationException;
  import org.apache.xerces.xni.parser.XMLEntityResolver;
  import org.apache.xerces.xni.parser.XMLInputSource;
  
  /**
   * The entity manager handles the registration of general and parameter
   * entities; resolves entities; and starts entities. The entity manager
   * is a central component in a standard parser configuration and this
   * class works directly with the entity scanner to manage the underlying
   * xni.
   * <p>
   * This component requires the following features and properties from the
   * component manager that uses it:
   * <ul>
   *  <li>http://xml.org/sax/features/validation</li>
   *  <li>http://xml.org/sax/features/external-general-entities</li>
   *  <li>http://xml.org/sax/features/external-parameter-entities</li>
   *  <li>http://apache.org/xml/features/allow-java-encodings</li>
   *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
   *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
   *  <li>http://apache.org/xml/properties/internal/entity-resolver</li>
   * </ul>
   *
   *
   * @author Andy Clark, IBM
   * @author Arnaud  Le Hors, IBM
   *
   * @version $Id: XML11EntityManager.java,v 1.1 2002/07/31 14:08:12 neilg Exp $
   */
  public class XML11EntityManager
      extends XMLEntityManager {
  
      //
      // Constructors
      //
  
      /** Default constructor. */
      public XML11EntityManager() {
          this(null);
      } // <init>()
  
      /**
       * Constructs an entity manager that shares the specified entity
       * declarations during each parse.
       * <p>
       * <strong>REVISIT:</strong> We might want to think about the "right"
       * way to expose the list of declared entities. For now, the knowledge
       * how to access the entity declarations is implicit.
       */
      public XML11EntityManager(XMLEntityManager entityManager) {
  
          // create scanner
          super(entityManager);
  
      } // <init>(XMLEntityManager)
  
      //
      // Protected methods
      //
  
      protected XMLEntityScanner createEntityScanner() {
          return new XML11EntityScanner();
      } // createEntityScanner():  XMLEntityScanner
      // Classes
  
      /**
       * Implements the entity scanner methods.
       *
       * @author Andy Clark, IBM
       */
      protected class XML11EntityScanner
          extends XMLEntityManager.EntityScanner {
  
          //
          // Constructors
          //
  
          /** Default constructor. */
          public XML11EntityScanner() {
              super();
          } // <init>()
  
          //
          // XMLEntityScanner methods
          //
  
          /**
           * Returns the next character on the input.
           * <p>
           * <strong>Note:</strong> The character is <em>not</em> consumed.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public int peekChar() throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
  
              // peek at character
              int c = fCurrentEntity.ch[fCurrentEntity.position];
  
              // return peeked character
              if (fCurrentEntity.isExternal()) {
                  return (c != '\r' && c != 0x85 && c != 0x2028) ? c : '\n';
              }
              else {
                  return c;
              }
  
          } // peekChar():int
  
          /**
           * Returns the next character on the input.
           * <p>
           * <strong>Note:</strong> The character is consumed.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public int scanChar() throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
  
              // scan character
              int c = fCurrentEntity.ch[fCurrentEntity.position++];
              boolean external = false;
              if (c == '\n' ||
                  ((c == '\r' || c == 0x85 || c == 0x2028) && (external = fCurrentEntity.isExternal()))) {
                  fCurrentEntity.lineNumber++;
                  fCurrentEntity.columnNumber = 1;
                  if (fCurrentEntity.position == fCurrentEntity.count) {
                      fCurrentEntity.ch[0] = (char)c;
                      load(1, false);
                  }
                  if ((c == '\r' || c == 0x85) && external) {
                      if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
                          fCurrentEntity.position--;
                      }
                      c = '\n';
                  }
              }
  
              // return character that was scanned
              fCurrentEntity.columnNumber++;
              return c;
  
          } // scanChar():int
  
          /**
           * Scans a range of parsed character data, setting the fields of the
           * XMLString structure, appropriately.
           * <p>
           * <strong>Note:</strong> The characters are consumed.
           * <p>
           * <strong>Note:</strong> This method does not guarantee to return
           * the longest run of parsed character data. This method may return
           * before markup due to reaching the end of the input buffer or any
           * other reason.
           * <p>
           * <strong>Note:</strong> The fields contained in the XMLString
           * structure are not guaranteed to remain valid upon subsequent calls
           * to the entity scanner. Therefore, the caller is responsible for
           * immediately using the returned character data or making a copy of
           * the character data.
           *
           * @param content The content structure to fill.
           *
           * @return Returns the next character on the input, if known. This
           *         value may be -1 but this does <em>note</em> designate
           *         end of file.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public int scanContent(XMLString content) throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
              else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                  fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
                  load(1, false);
                  fCurrentEntity.position = 0;
              }
  
              // normalize newlines
              int offset = fCurrentEntity.position;
              int c = fCurrentEntity.ch[offset];
              int newlines = 0;
              boolean external = fCurrentEntity.isExternal();
              if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
                  do {
                      c = fCurrentEntity.ch[fCurrentEntity.position++];
                      if ((c == '\r' || c == 0x85) && external) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                          if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
                              fCurrentEntity.position++;
                              offset++;
                          }
                          /*** NEWLINE NORMALIZATION ***/
                          else {
                              newlines++;
                          }
                      }
                      else if (c == '\n' || c == 0x2028) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                      }
                      else {
                          fCurrentEntity.position--;
                          break;
                      }
                  } while (fCurrentEntity.position < fCurrentEntity.count - 1);
                  for (int i = offset; i < fCurrentEntity.position; i++) {
                      fCurrentEntity.ch[i] = '\n';
                  }
                  int length = fCurrentEntity.position - offset;
                  if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                      content.setValues(fCurrentEntity.ch, offset, length);
                      return -1;
                  }
              }
  
              // inner loop, scanning for content
              while (fCurrentEntity.position < fCurrentEntity.count) {
                  c = fCurrentEntity.ch[fCurrentEntity.position++];
                  if (!XMLChar.isContent(c) && c != 0x85 && c != 0x2028) {
                      fCurrentEntity.position--;
                      break;
                  }
              }
              int length = fCurrentEntity.position - offset;
              fCurrentEntity.columnNumber += length - newlines;
              content.setValues(fCurrentEntity.ch, offset, length);
  
              // return next character
              if (fCurrentEntity.position != fCurrentEntity.count) {
                  c = fCurrentEntity.ch[fCurrentEntity.position];
                  // REVISIT: Does this need to be updated to fix the
                  //          #x0D ^#x0A newline normalization problem? -Ac
                  if ((c == '\r' || c == 0x85 || c == 0x2028) && external) {
                      c = '\n';
                  }
              }
              else {
                  c = -1;
              }
              return c;
  
          } // scanContent(XMLString):int
  
          /**
           * Scans a range of attribute value data, setting the fields of the
           * XMLString structure, appropriately.
           * <p>
           * <strong>Note:</strong> The characters are consumed.
           * <p>
           * <strong>Note:</strong> This method does not guarantee to return
           * the longest run of attribute value data. This method may return
           * before the quote character due to reaching the end of the input
           * buffer or any other reason.
           * <p>
           * <strong>Note:</strong> The fields contained in the XMLString
           * structure are not guaranteed to remain valid upon subsequent calls
           * to the entity scanner. Therefore, the caller is responsible for
           * immediately using the returned character data or making a copy of
           * the character data.
           *
           * @param quote   The quote character that signifies the end of the
           *                attribute value data.
           * @param content The content structure to fill.
           *
           * @return Returns the next character on the input, if known. This
           *         value may be -1 but this does <em>note</em> designate
           *         end of file.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public int scanLiteral(int quote, XMLString content)
              throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
              else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                  fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
                  load(1, false);
                  fCurrentEntity.position = 0;
              }
  
              // normalize newlines
              int offset = fCurrentEntity.position;
              int c = fCurrentEntity.ch[offset];
              int newlines = 0;
              boolean external = fCurrentEntity.isExternal();
              if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
                  do {
                      c = fCurrentEntity.ch[fCurrentEntity.position++];
                      if ((c == '\r' || c == 0x85) && external) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                          if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
                              fCurrentEntity.position++;
                              offset++;
                          }
                          /*** NEWLINE NORMALIZATION ***/
                          else {
                              newlines++;
                          }
                      }
                      else if (c == '\n' || c == 0x2028) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                      }
                      else {
                          fCurrentEntity.position--;
                          break;
                      }
                  } while (fCurrentEntity.position < fCurrentEntity.count - 1);
                  for (int i = offset; i < fCurrentEntity.position; i++) {
                      fCurrentEntity.ch[i] = '\n';
                  }
                  int length = fCurrentEntity.position - offset;
                  if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                      content.setValues(fCurrentEntity.ch, offset, length);
                      return -1;
                  }
              }
  
              // scan literal value
              while (fCurrentEntity.position < fCurrentEntity.count) {
                  c = fCurrentEntity.ch[fCurrentEntity.position++];
                  if ((c == quote &&
                       (!fCurrentEntity.literal || external))
                      || c == '%' || (!XMLChar.isContent(c) && c != 0x85 && c != 0x2028)) {
                      fCurrentEntity.position--;
                      break;
                  }
              }
              int length = fCurrentEntity.position - offset;
              fCurrentEntity.columnNumber += length - newlines;
              content.setValues(fCurrentEntity.ch, offset, length);
  
              // return next character
              if (fCurrentEntity.position != fCurrentEntity.count) {
                  c = fCurrentEntity.ch[fCurrentEntity.position];
                  // NOTE: We don't want to accidentally signal the
                  //       end of the literal if we're expanding an
                  //       entity appearing in the literal. -Ac
                  if (c == quote && fCurrentEntity.literal) {
                      c = -1;
                  }
              }
              else {
                  c = -1;
              }
              return c;
  
          } // scanLiteral(int,XMLString):int
  
          /**
           * Scans a range of character data up to the specicied delimiter,
           * setting the fields of the XMLString structure, appropriately.
           * <p>
           * <strong>Note:</strong> The characters are consumed.
           * <p>
           * <strong>Note:</strong> This assumes that the internal buffer is
           * at least the same size, or bigger, than the length of the delimiter
           * and that the delimiter contains at least one character.
           * <p>
           * <strong>Note:</strong> This method does not guarantee to return
           * the longest run of character data. This method may return before
           * the delimiter due to reaching the end of the input buffer or any
           * other reason.
           * <p>
           * <strong>Note:</strong> The fields contained in the XMLString
           * structure are not guaranteed to remain valid upon subsequent calls
           * to the entity scanner. Therefore, the caller is responsible for
           * immediately using the returned character data or making a copy of
           * the character data.
           *
           * @param delimiter The string that signifies the end of the character
           *                  data to be scanned.
           * @param data      The data structure to fill.
           *
           * @return Returns true if there is more data to scan, false otherwise.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public boolean scanData(String delimiter, XMLString data)
              throws IOException {
  
              // load more characters, if needed
              int delimLen = delimiter.length();
              char charAt0 = delimiter.charAt(0);
              //int limit = fCurrentEntity.count - delimLen + 1;
  
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
              else if (fCurrentEntity.position >= fCurrentEntity.count - delimLen) {
                  System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position,
                                   fCurrentEntity.ch, 0, fCurrentEntity.count - fCurrentEntity.position);
                  load(fCurrentEntity.count - fCurrentEntity.position, false);
                  fCurrentEntity.position = 0;
              }
  
              // normalize newlines
              int offset = fCurrentEntity.position;
              int c = fCurrentEntity.ch[offset];
              int newlines = 0;
              boolean external = fCurrentEntity.isExternal();
              if (c == '\n' || ((c == '\r' || c == 0x85 || c == 0x2028) && external)) {
                  do {
                      c = fCurrentEntity.ch[fCurrentEntity.position++];
                      if ((c == '\r' || c == 0x85) && external) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                          if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
                              fCurrentEntity.position++;
                              offset++;
                          }
                          /*** NEWLINE NORMALIZATION ***/
                          else {
                              newlines++;
                          }
                      }
                      else if (c == '\n' || c == 0x2028) {
                          newlines++;
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              offset = 0;
                              fCurrentEntity.position = newlines;
                              fCurrentEntity.count = newlines;
                              if (load(newlines, false)) {
                                  break;
                              }
                          }
                      }
                      else {
                          fCurrentEntity.position--;
                          break;
                      }
                  } while (fCurrentEntity.position < fCurrentEntity.count - 1);
                  for (int i = offset; i < fCurrentEntity.position; i++) {
                      fCurrentEntity.ch[i] = '\n';
                  }
                  int length = fCurrentEntity.position - offset;
                  if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                      data.setValues(fCurrentEntity.ch, offset, length);
                      return true;
                  }
              }
  
              // iterate over buffer looking for delimiter
              boolean done = false;
              OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
                  c = fCurrentEntity.ch[fCurrentEntity.position++];
                  if (c == charAt0) {
                      // looks like we just hit the delimiter
                      int delimOffset = fCurrentEntity.position - 1;
                      for (int i = 1; i < delimLen; i++) {
                          if (fCurrentEntity.position == fCurrentEntity.count) {
                              fCurrentEntity.position -= i;
                              break OUTER;
                          }
                          c = fCurrentEntity.ch[fCurrentEntity.position++];
                          if (delimiter.charAt(i) != c) {
                              fCurrentEntity.position--;
                              break;
                          }
                      }
                      if (fCurrentEntity.position == delimOffset + delimLen) {
                          done = true;
                          break;
                      }
                  }
                  else if (c == '\n' || (external && (c == '\r' || c == 0x85 || c == 0x2028))) {
                      fCurrentEntity.position--;
                      break;
                  }
                  else if (XMLChar.isInvalid(c)) {
                      fCurrentEntity.position--;
                      break;
                  }
              }
              int length = fCurrentEntity.position - offset;
              fCurrentEntity.columnNumber += length - newlines;
              if (done) {
                  length -= delimLen;
              }
              data.setValues(fCurrentEntity.ch, offset, length);
  
              // return true if string was skipped
              return !done;
  
          } // scanData(String,XMLString)
  
          /**
           * Skips a character appearing immediately on the input.
           * <p>
           * <strong>Note:</strong> The character is consumed only if it matches
           * the specified character.
           *
           * @param c The character to skip.
           *
           * @return Returns true if the character was skipped.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public boolean skipChar(int c) throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
  
              // skip character
              int cc = fCurrentEntity.ch[fCurrentEntity.position];
              if (cc == c) {
                  fCurrentEntity.position++;
                  if (c == '\n') {
                      fCurrentEntity.lineNumber++;
                      fCurrentEntity.columnNumber = 1;
                  }
                  else {
                      fCurrentEntity.columnNumber++;
                  }
                  return true;
              }
              else if (c == '\n' && cc == 0x2028) {
                  fCurrentEntity.position++;
                  fCurrentEntity.lineNumber++;
                  fCurrentEntity.columnNumber = 1;
                  return true;
              }
              else if (c == '\n' && (cc == '\r' || cc == 0x85 ) && fCurrentEntity.isExternal()) {
                  // handle newlines
                  if (fCurrentEntity.position == fCurrentEntity.count) {
                      fCurrentEntity.ch[0] = (char)cc;
                      load(1, false);
                  }
                  fCurrentEntity.position++;
                  if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
                      fCurrentEntity.position++;
                  }
                  fCurrentEntity.lineNumber++;
                  fCurrentEntity.columnNumber = 1;
                  return true;
              }
  
              // character was not skipped
              return false;
  
          } // skipChar(int):boolean
  
          /**
           * Skips space characters appearing immediately on the input.
           * <p>
           * <strong>Note:</strong> The characters are consumed only if they are
           * space characters.
           *
           * @return Returns true if at least one space character was skipped.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           *
           * @see org.apache.xerces.util.XMLChar#isSpace
           */
          public boolean skipSpaces() throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
  
              // skip spaces
              int c = fCurrentEntity.ch[fCurrentEntity.position];
              if (XMLChar.isXML11Space(c)) {
                  boolean external = fCurrentEntity.isExternal();
                  do {
                      boolean entityChanged = false;
                      // handle newlines
                      if (c == '\n' || (external && (c == '\r' || c == 0x85 || c == 0x2028))) {
                          fCurrentEntity.lineNumber++;
                          fCurrentEntity.columnNumber = 1;
                          if (fCurrentEntity.position == fCurrentEntity.count - 1) {
                              fCurrentEntity.ch[0] = (char)c;
                              entityChanged = load(1, true);
                              if (!entityChanged)
                                  // the load change the position to be 1,
                                  // need to restore it when entity not changed
                                  fCurrentEntity.position = 0;
                          }
                          if ((c == '\r' || c == 0x85) && external) {
                              // REVISIT: Does this need to be updated to fix the
                              //          #x0D ^#x0A newline normalization problem? -Ac
                              if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
                                  fCurrentEntity.position--;
                              }
                          }
                      }
                      else {
                          fCurrentEntity.columnNumber++;
                      }
                      // load more characters, if needed
                      if (!entityChanged)
                          fCurrentEntity.position++;
                      if (fCurrentEntity.position == fCurrentEntity.count) {
                          load(0, true);
                      }
                  } while (XMLChar.isXML11Space(c = fCurrentEntity.ch[fCurrentEntity.position]));
                  return true;
              }
  
              // no spaces were found
              return false;
  
          } // skipSpaces():boolean
  
          /**
           * Skips the specified string appearing immediately on the input.
           * <p>
           * <strong>Note:</strong> The characters are consumed only if they are
           * space characters.
           *
           * @param s The string to skip.
           *
           * @return Returns true if the string was skipped.
           *
           * @throws IOException  Thrown if i/o error occurs.
           * @throws EOFException Thrown on end of file.
           */
          public boolean skipString(String s) throws IOException {
  
              // load more characters, if needed
              if (fCurrentEntity.position == fCurrentEntity.count) {
                  load(0, true);
              }
  
              // skip string
              final int length = s.length();
              for (int i = 0; i < length; i++) {
                  char c = fCurrentEntity.ch[fCurrentEntity.position++];
                  if (c != s.charAt(i)) {
                      fCurrentEntity.position -= i + 1;
                      return false;
                  }
                  if (i < length - 1 && fCurrentEntity.position == fCurrentEntity.count) {
                      System.arraycopy(fCurrentEntity.ch, fCurrentEntity.count - i - 1, fCurrentEntity.ch, 0, i + 1);
                      // REVISIT: Can a string to be skipped cross an
                      //          entity boundary? -Ac
                      if (load(i + 1, false)) {
                          fCurrentEntity.position -= i + 1;
                          return false;
                      }
                  }
              }
              fCurrentEntity.columnNumber += length;
              return true;
  
          } // skipString(String):boolean
  
      } // class XML11EntityScanner
  
  } // class XMLEntityManager
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xerces/impl/XML11DocumentScannerImpl.java
  
  Index: XML11DocumentScannerImpl.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999-2002 The Apache Software Foundation.
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer.
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.xerces.impl;
  
  import java.io.EOFException;
  import java.io.IOException;
  import java.util.Stack;
  
  import org.apache.xerces.impl.XMLEntityManager;
  import org.apache.xerces.impl.XMLEntityScanner;
  import org.apache.xerces.impl.XMLErrorReporter;
  import org.apache.xerces.impl.msg.XMLMessageFormatter;
  import org.apache.xerces.impl.validation.ValidationManager;
  
  import org.apache.xerces.util.XMLAttributesImpl;
  import org.apache.xerces.util.XMLStringBuffer;
  import org.apache.xerces.util.XMLResourceIdentifierImpl;
  import org.apache.xerces.util.SymbolTable;
  import org.apache.xerces.util.XMLChar;
  
  import org.apache.xerces.xni.QName;
  import org.apache.xerces.xni.XMLAttributes;
  import org.apache.xerces.xni.XMLDocumentHandler;
  import org.apache.xerces.xni.XMLResourceIdentifier;
  import org.apache.xerces.xni.XMLString;
  import org.apache.xerces.xni.XNIException;
  import org.apache.xerces.xni.parser.XMLComponent;
  import org.apache.xerces.xni.parser.XMLComponentManager;
  import org.apache.xerces.xni.parser.XMLConfigurationException;
  import org.apache.xerces.xni.parser.XMLDocumentScanner;
  import org.apache.xerces.xni.parser.XMLDTDScanner;
  import org.apache.xerces.xni.parser.XMLInputSource;
  
  /**
   * This class is responsible for scanning XML document structure
   * and content. The scanner acts as the source for the document
   * information which is communicated to the document handler.
   * <p>
   * This component requires the following features and properties from the
   * component manager that uses it:
   * <ul>
   *  <li>http://xml.org/sax/features/namespaces</li>
   *  <li>http://xml.org/sax/features/validation</li>
   *  <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
   *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
   *  <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
   *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
   *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
   *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
   *  <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
   * </ul>
   *
   * @author Glenn Marcy, IBM
   * @author Andy Clark, IBM
   * @author Arnaud  Le Hors, IBM
   * @author Eric Ye, IBM
   *
   * @version $Id: XML11DocumentScannerImpl.java,v 1.1 2002/07/31 14:08:12 neilg Exp $
   */
  public class XML11DocumentScannerImpl
      extends XMLDocumentScannerImpl {
  
  
      /** Array of 3 strings. */
      private String[] fStrings = new String[3];
  
      /** String. */
      private XMLString fString = new XMLString();
  
      /** String buffer. */
      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
      private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
  
      //
      // Constructors
      //
  
      /** Default constructor. */
      public XML11DocumentScannerImpl() {super();} // <init>()
  
      //
      // overridden methods
      //
  
      // XMLDocumentFragmentImpl methods
  
      /**
       * Scans element content.
       *
       * @returns Returns the next character on the stream.
       */
      protected int scanContent() throws IOException, XNIException {
  
          XMLString content = fString;
          int c = fEntityScanner.scanContent(content);
          if (c == '\r' || c == 0x85 || c == 0x2028) {
              // happens when there is the character reference &#13;
              // but scanContent doesn't do entity expansions...
              // is this *really* necessary???  - NG
              fEntityScanner.scanChar();
              fStringBuffer.clear();
              fStringBuffer.append(fString);
              fStringBuffer.append((char)c);
              content = fStringBuffer;
              c = -1;
          }
          if (fDocumentHandler != null && content.length > 0) {
              fDocumentHandler.characters(content, null);
          }
  
          if (c == ']' && fString.length == 0) {
              fStringBuffer.clear();
              fStringBuffer.append((char)fEntityScanner.scanChar());
              // remember where we are in case we get an endEntity before we
              // could flush the buffer out - this happens when we're parsing an
              // entity which ends with a ]
              fInScanContent = true;
              //
              // We work on a single character basis to handle cases such as:
              // ']]]>' which we might otherwise miss.
              //
              if (fEntityScanner.skipChar(']')) {
                  fStringBuffer.append(']');
                  while (fEntityScanner.skipChar(']')) {
                      fStringBuffer.append(']');
                  }
                  if (fEntityScanner.skipChar('>')) {
                      reportFatalError("CDEndInContent", null);
                  }
              }
              if (fDocumentHandler != null && fStringBuffer.length != 0) {
                  fDocumentHandler.characters(fStringBuffer, null);
              }
              fInScanContent = false;
              c = -1;
          }
          return c;
  
      } // scanContent():int
  
      /**
       * Scans an XML or text declaration.
       * <p>
       * <pre>
       * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
       * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
       * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
       * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
       * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
       *                 | ('"' ('yes' | 'no') '"'))
       *
       * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
       * </pre>
       * <p> this was overridden to allow for version="1.1"</p>
       *
       * @param scanningTextDecl True if a text declaration is to
       *                         be scanned instead of an XML
       *                         declaration.
       * @param pseudoAttributeValues An array of size 3 to return the version,
       *                         encoding and standalone pseudo attribute values
       *                         (in that order).
       *
       * <strong>Note:</strong> This method uses fString, anything in it
       * at the time of calling is lost.
       */
      protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl,
                                           String[] pseudoAttributeValues) 
          throws IOException, XNIException {
  
          // pseudo-attribute values
          String version = null;
          String encoding = null;
          String standalone = null;
  
          // scan pseudo-attributes
          final int STATE_VERSION = 0;
          final int STATE_ENCODING = 1;
          final int STATE_STANDALONE = 2;
          final int STATE_DONE = 3;
          int state = STATE_VERSION;
  
          boolean dataFoundForTarget = false;
          boolean sawSpace = fEntityScanner.skipSpaces();
          while (fEntityScanner.peekChar() != '?') {
              dataFoundForTarget = true;
              String name = scanPseudoAttribute(scanningTextDecl, fString);
              switch (state) {
                  case STATE_VERSION: {
                      if (name == fVersionSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                         ? "SpaceRequiredBeforeVersionInTextDecl"
                                         : "SpaceRequiredBeforeVersionInXMLDecl",
                                               null);
                          }
                          version = fString.toString();
                          state = STATE_ENCODING;
                          if (!version.equals("1.0") || !version.equals("1.1")) {
                              // REVISIT: XML REC says we should throw an error in such cases.
                              // some may object the throwing of fatalError.
                              reportFatalError("VersionNotSupported", 
                                               new Object[]{version});
                          }
                      }
                      else if (name == fEncodingSymbol) {
                          if (!scanningTextDecl) {
                              reportFatalError("VersionInfoRequired", null);
                          }
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "SpaceRequiredBeforeEncodingInTextDecl"
                                        : "SpaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                      }
                      else {
                          if (scanningTextDecl) {
                              reportFatalError("EncodingDeclRequired", null);
                          }
                          else {
                              reportFatalError("VersionInfoRequired", null);
                          }
                      }
                      break;
                  }
                  case STATE_ENCODING: {
                      if (name == fEncodingSymbol) {
                          if (!sawSpace) {
                              reportFatalError(scanningTextDecl
                                        ? "SpaceRequiredBeforeEncodingInTextDecl"
                                        : "SpaceRequiredBeforeEncodingInXMLDecl",
                                               null);
                          }
                          encoding = fString.toString();
                          state = scanningTextDecl ? STATE_DONE : STATE_STANDALONE;
                      }
                      else if (!scanningTextDecl && name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              reportFatalError("SpaceRequiredBeforeStandalone",
                                               null);
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              reportFatalError("SDDeclInvalid", null);
                          }
                      }
                      else {
                          reportFatalError("EncodingDeclRequired", null);
                      }
                      break;
                  }
                  case STATE_STANDALONE: {
                      if (name == fStandaloneSymbol) {
                          if (!sawSpace) {
                              reportFatalError("SpaceRequiredBeforeStandalone",
                                               null);
                          }
                          standalone = fString.toString();
                          state = STATE_DONE;
                          if (!standalone.equals("yes") && !standalone.equals("no")) {
                              reportFatalError("SDDeclInvalid", null);
                          }
                      }
                      else {
                          reportFatalError("EncodingDeclRequired", null);
                      }
                      break;
                  }
                  default: {
                      reportFatalError("NoMorePseudoAttributes", null);
                  }
              }
              sawSpace = fEntityScanner.skipSpaces();
          }
          // REVISIT: should we remove this error reporting?
          if (scanningTextDecl && state != STATE_DONE) {
              reportFatalError("MorePseudoAttributes", null);
          }
          
          // If there is no data in the xml or text decl then we fail to report error 
          // for version or encoding info above.
          if (scanningTextDecl) {
              if (!dataFoundForTarget && encoding == null) {
                  reportFatalError("EncodingDeclRequired", null);
              }
          }
          else {
              if (!dataFoundForTarget && version == null) {
                  reportFatalError("VersionInfoRequired", null);
              }
          }
  
          // end
          if (!fEntityScanner.skipChar('?')) {
              reportFatalError("XMLDeclUnterminated", null);
          }
          if (!fEntityScanner.skipChar('>')) {
              reportFatalError("XMLDeclUnterminated", null);
  
          }
          
          // fill in return array
          pseudoAttributeValues[0] = version;
          pseudoAttributeValues[1] = encoding;
          pseudoAttributeValues[2] = standalone;
  
      } // scanXMLDeclOrTextDecl(boolean)
  
      /**
       * Scans an attribute value and normalizes whitespace converting all
       * whitespace characters to space characters.
       * 
       * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
       *
       * @param value The XMLString to fill in with the value.
       * @param nonNormalizedValue The XMLString to fill in with the 
       *                           non-normalized value.
       * @param atName The name of the attribute being parsed (for error msgs).
       * @param attributes The attributes list for the scanned attribute.
       * @param attrIndex The index of the attribute to use from the list.
       * @param checkEntities true if undeclared entities should be reported as VC violation,  
       *                      false if undeclared entities should be reported as WFC violation.
       *
       * <strong>Note:</strong> This method uses fStringBuffer2, anything in it
       * at the time of calling is lost.
       **/
      protected void scanAttributeValue(XMLString value, 
                                        XMLString nonNormalizedValue,
                                        String atName,
                                        XMLAttributes attributes, int attrIndex,
                                        boolean checkEntities)
          throws IOException, XNIException
      {
          // quote
          int quote = fEntityScanner.peekChar();
          if (quote != '\'' && quote != '"') {
              reportFatalError("OpenQuoteExpected", new Object[]{atName});
          }
  
          fEntityScanner.scanChar();
          int entityDepth = fEntityDepth;
  
          int c = fEntityScanner.scanLiteral(quote, value);
          if (DEBUG_ATTR_NORMALIZATION) {
              System.out.println("** scanLiteral -> \""
                                 + value.toString() + "\"");
          }
          fStringBuffer2.clear();
          fStringBuffer2.append(value);
          normalizeWhitespace(value);
          if (DEBUG_ATTR_NORMALIZATION) {
              System.out.println("** normalizeWhitespace -> \""
                                 + value.toString() + "\"");
          }
          if (c != quote) {
              fScanningAttribute = true;
              fStringBuffer.clear();
              do {
                  fStringBuffer.append(value);
                  if (DEBUG_ATTR_NORMALIZATION) {
                      System.out.println("** value2: \""
                                         + fStringBuffer.toString() + "\"");
                  }
                  if (c == '&') {
                      fEntityScanner.skipChar('&');
                      if (entityDepth == fEntityDepth) {
                          fStringBuffer2.append('&');
                      }
                      if (fEntityScanner.skipChar('#')) {
                          if (entityDepth == fEntityDepth) {
                              fStringBuffer2.append('#');
                          }
                          int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
                          if (ch != -1) {
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** value3: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                      }
                      else {
                          String entityName = fEntityScanner.scanName();
                          if (entityName == null) {
                              reportFatalError("NameRequiredInReference", null);
                          }
                          else if (entityDepth == fEntityDepth) {
                              fStringBuffer2.append(entityName);
                          }
                          if (!fEntityScanner.skipChar(';')) {
                              reportFatalError("SemicolonRequiredInReference",
                                               new Object []{entityName});
                          }
                          else if (entityDepth == fEntityDepth) {
                              fStringBuffer2.append(';');
                          }
                          if (entityName == fAmpSymbol) {
                              fStringBuffer.append('&');
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** value5: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                          else if (entityName == fAposSymbol) {
                              fStringBuffer.append('\'');
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** value7: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                          else if (entityName == fLtSymbol) {
                              fStringBuffer.append('<');
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** value9: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                          else if (entityName == fGtSymbol) {
                              fStringBuffer.append('>');
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** valueB: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                          else if (entityName == fQuotSymbol) {
                              fStringBuffer.append('"');
                              if (DEBUG_ATTR_NORMALIZATION) {
                                  System.out.println("** valueD: \""
                                                     + fStringBuffer.toString()
                                                     + "\"");
                              }
                          }
                          else {
                              if (fEntityManager.isExternalEntity(entityName)) {
                                  reportFatalError("ReferenceToExternalEntity",
                                                   new Object[] { entityName });
                              }
                              else {
                                  if (!fEntityManager.isDeclaredEntity(entityName)) {
                                      //WFC & VC: Entity Declared
                                      if (checkEntities) {
                                          if (fValidation) {
                                              fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
                                                                         "EntityNotDeclared",
                                                                         new Object[]{entityName},
                                                                         XMLErrorReporter.SEVERITY_ERROR);
                                          }
                                      }
                                      else {
                                          reportFatalError("EntityNotDeclared",
                                                           new Object[]{entityName});
                                      }
                                  }
                                  fEntityManager.startEntity(entityName, true);
                              }
                          }
                      }
                  }
                  else if (c == '<') {
                      reportFatalError("LessthanInAttValue",
                                       new Object[] { null, atName });
                      fEntityScanner.scanChar();
                      if (entityDepth == fEntityDepth) {
                          fStringBuffer2.append((char)c);
                      }
                  }
                  else if (c == '%' || c == ']') {
                      fEntityScanner.scanChar();
                      fStringBuffer.append((char)c);
                      if (entityDepth == fEntityDepth) {
                          fStringBuffer2.append((char)c);
                      }
                      if (DEBUG_ATTR_NORMALIZATION) {
                          System.out.println("** valueF: \""
                                             + fStringBuffer.toString() + "\"");
                      }
                  }
                  // note that none of these characters should ever get through
                  // XML11EntityScanner.  Not sure why
                  // this check was originally necessary.  - NG
                  else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
                      fEntityScanner.scanChar();
                      fStringBuffer.append(' ');
                      if (entityDepth == fEntityDepth) {
                          fStringBuffer2.append('\n');
                      }
                  }
                  else if (c != -1 && XMLChar.isHighSurrogate(c)) {
                      if (scanSurrogates(fStringBuffer3)) {
                          fStringBuffer.append(fStringBuffer3);
                          if (entityDepth == fEntityDepth) {
                              fStringBuffer2.append(fStringBuffer3);
                          }
                          if (DEBUG_ATTR_NORMALIZATION) {
                              System.out.println("** valueI: \""
                                                 + fStringBuffer.toString()
                                                 + "\"");
                          }
                      }
                  }
                  else if (c != -1 && XMLChar.isInvalid(c)) {
                      reportFatalError("InvalidCharInAttValue",
                                       new Object[] {Integer.toString(c, 16)});
                      fEntityScanner.scanChar();
                      if (entityDepth == fEntityDepth) {
                          fStringBuffer2.append((char)c);
                      }
                  }
                  c = fEntityScanner.scanLiteral(quote, value);
                  if (entityDepth == fEntityDepth) {
                      fStringBuffer2.append(value);
                  }
                  normalizeWhitespace(value);
              } while (c != quote || entityDepth != fEntityDepth);
              fStringBuffer.append(value);
              if (DEBUG_ATTR_NORMALIZATION) {
                  System.out.println("** valueN: \""
                                     + fStringBuffer.toString() + "\"");
              }
              value.setValues(fStringBuffer);
              fScanningAttribute = false;
          }
          nonNormalizedValue.setValues(fStringBuffer2);
  
          // quote
          int cquote = fEntityScanner.scanChar();
          if (cquote != quote) {
              reportFatalError("CloseQuoteExpected", new Object[]{atName});
          }
      } // scanAttributeValue()
  
      //
      // XMLScanner methods
      //
      // NOTE:  this is a carbon copy of the code in XML11DTDScannerImpl;
      // we need to override these methods in both places.  
      // this needs to be refactored!!!  - NG
      /**
       * Scans public ID literal.
       *
       * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 
       * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
       *
       * The returned string is normalized according to the following rule,
       * from http://www.w3.org/TR/REC-xml#dt-pubid:
       *
       * Before a match is attempted, all strings of white space in the public
       * identifier must be normalized to single space characters (#x20), and
       * leading and trailing white space must be removed.
       *
       * @param literal The string to fill in with the public ID literal.
       * @returns True on success.
       *
       * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
       * the time of calling is lost.
       */
      protected boolean scanPubidLiteral(XMLString literal)
          throws IOException, XNIException
      {
          int quote = fEntityScanner.scanChar();
          if (quote != '\'' && quote != '"') {
              reportFatalError("QuoteRequiredInPublicID", null);
              return false;
          }
  
          fStringBuffer.clear();
          // skip leading whitespace
          boolean skipSpace = true;
          boolean dataok = true;
          while (true) {
              int c = fEntityScanner.scanChar();
              if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
                  if (!skipSpace) {
                      // take the first whitespace as a space and skip the others
                      fStringBuffer.append(' ');
                      skipSpace = true;
                  }
              }
              else if (c == quote) {
                  if (skipSpace) {
                      // if we finished on a space let's trim it
                      fStringBuffer.length--;
                  }
                  literal.setValues(fStringBuffer);
                  break;
              }
              else if (XMLChar.isPubid(c)) {
                  fStringBuffer.append((char)c);
                  skipSpace = false;
              }
              else if (c == -1) {
                  reportFatalError("PublicIDUnterminated", null);
                  return false;
              }
              else {
                  dataok = false;
                  reportFatalError("InvalidCharInPublicID",
                                   new Object[]{Integer.toHexString(c)});
              }
          }
          return dataok;
     }
  
      /**
       * Normalize whitespace in an XMLString converting all whitespace
       * characters to space characters.
       */
      protected void normalizeWhitespace(XMLString value) {
          int end = value.offset + value.length;
          for (int i = value.offset; i < end; i++) {
              int c = value.ch[i];
              if (XMLChar.isXML11Space(c)) {
                  value.ch[i] = ' ';
              }
          }
      }
  
  } // class XML11DocumentScannerImpl
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xerces/impl/XML11DTDScannerImpl.java
  
  Index: XML11DTDScannerImpl.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 1999-2002 The Apache Software Foundation.  
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.xerces.impl;
  
  import java.io.IOException;
  import java.io.EOFException;
  
  import org.apache.xerces.impl.Constants;
  import org.apache.xerces.impl.XMLEntityManager;
  import org.apache.xerces.impl.XMLErrorReporter;
  import org.apache.xerces.impl.msg.XMLMessageFormatter;
  
  import org.apache.xerces.util.AugmentationsImpl;
  import org.apache.xerces.util.XMLAttributesImpl;
  import org.apache.xerces.util.XMLChar;
  import org.apache.xerces.util.XMLStringBuffer;
  import org.apache.xerces.util.SymbolTable;
  
  import org.apache.xerces.xni.XMLDTDContentModelHandler;
  import org.apache.xerces.xni.XMLDTDHandler;
  import org.apache.xerces.xni.XMLResourceIdentifier;
  import org.apache.xerces.xni.XMLString;
  import org.apache.xerces.xni.XNIException;
  import org.apache.xerces.xni.parser.XMLComponent;
  import org.apache.xerces.xni.parser.XMLComponentManager;
  import org.apache.xerces.xni.parser.XMLConfigurationException;
  import org.apache.xerces.xni.parser.XMLDTDScanner;
  import org.apache.xerces.xni.parser.XMLInputSource;
  
  /**
   * This class is responsible for scanning the declarations found
   * in the internal and external subsets of a DTD in an XML document.
   * The scanner acts as the sources for the DTD information which is 
   * communicated to the DTD handlers.
   * <p>
   * This component requires the following features and properties from the
   * component manager that uses it:
   * <ul>
   *  <li>http://xml.org/sax/features/validation</li>
   *  <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
   *  <li>http://apache.org/xml/properties/internal/symbol-table</li>
   *  <li>http://apache.org/xml/properties/internal/error-reporter</li>
   *  <li>http://apache.org/xml/properties/internal/entity-manager</li>
   * </ul>
   *
   * @author Arnaud  Le Hors, IBM
   * @author Andy Clark, IBM
   * @author Glenn Marcy, IBM
   * @author Eric Ye, IBM
   *
   * @version $Id: XML11DTDScannerImpl.java,v 1.1 2002/07/31 14:08:12 neilg Exp $
   */
  public class XML11DTDScannerImpl
      extends XMLDTDScannerImpl {
  
      /** Array of 3 strings. */
      private String[] fStrings = new String[3];
  
      /** String. */
      private XMLString fString = new XMLString();
  
      /** String buffer. */
      private XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  
      /** String buffer. */
      private XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
      private XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();
  
      //
      // Constructors
      //
  
      /** Default constructor. */
      public XML11DTDScannerImpl() {super();} // <init>()
  
      /** Constructor for he use of non-XMLComponentManagers. */
      public XML11DTDScannerImpl(SymbolTable symbolTable,
                  XMLErrorReporter errorReporter, XMLEntityManager entityManager) {
          super(symbolTable, errorReporter, entityManager);
      }
  
      //
      // XMLDTDScanner methods
      //
  
      //
      // XMLScanner methods
      //
      // NOTE:  this is a carbon copy of the code in XML11DocumentScannerImpl;
      // we need to override these methods in both places.  Ah for
      // multiple inheritance...
      // This needs to be refactored!!!  - NG
      /**
       * Scans public ID literal.
       *
       * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" 
       * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
       *
       * The returned string is normalized according to the following rule,
       * from http://www.w3.org/TR/REC-xml#dt-pubid:
       *
       * Before a match is attempted, all strings of white space in the public
       * identifier must be normalized to single space characters (#x20), and
       * leading and trailing white space must be removed.
       *
       * @param literal The string to fill in with the public ID literal.
       * @returns True on success.
       *
       * <strong>Note:</strong> This method uses fStringBuffer, anything in it at
       * the time of calling is lost.
       */
      protected boolean scanPubidLiteral(XMLString literal)
          throws IOException, XNIException
      {
          int quote = fEntityScanner.scanChar();
          if (quote != '\'' && quote != '"') {
              reportFatalError("QuoteRequiredInPublicID", null);
              return false;
          }
  
          fStringBuffer.clear();
          // skip leading whitespace
          boolean skipSpace = true;
          boolean dataok = true;
          while (true) {
              int c = fEntityScanner.scanChar();
              if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
                  if (!skipSpace) {
                      // take the first whitespace as a space and skip the others
                      fStringBuffer.append(' ');
                      skipSpace = true;
                  }
              }
              else if (c == quote) {
                  if (skipSpace) {
                      // if we finished on a space let's trim it
                      fStringBuffer.length--;
                  }
                  literal.setValues(fStringBuffer);
                  break;
              }
              else if (XMLChar.isPubid(c)) {
                  fStringBuffer.append((char)c);
                  skipSpace = false;
              }
              else if (c == -1) {
                  reportFatalError("PublicIDUnterminated", null);
                  return false;
              }
              else {
                  dataok = false;
                  reportFatalError("InvalidCharInPublicID",
                                   new Object[]{Integer.toHexString(c)});
              }
          }
          return dataok;
     }
  
      /**
       * Normalize whitespace in an XMLString converting all whitespace
       * characters to space characters.
       */
      protected void normalizeWhitespace(XMLString value) {
          int end = value.offset + value.length;
          for (int i = value.offset; i < end; i++) {
              int c = value.ch[i];
              if (XMLChar.isXML11Space(c)) {
                  value.ch[i] = ' ';
              }
          }
      }
  
  } // class XML11DTDScannerImpl
  
  
  
  1.1                  xml-xerces/java/src/org/apache/xerces/parsers/XML11Configuration.java
  
  Index: XML11Configuration.java
  ===================================================================
  /*
   * The Apache Software License, Version 1.1
   *
   *
   * Copyright (c) 2001, 2002 The Apache Software Foundation.  
   * All rights reserved.
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions
   * are met:
   *
   * 1. Redistributions of source code must retain the above copyright
   *    notice, this list of conditions and the following disclaimer. 
   *
   * 2. Redistributions in binary form must reproduce the above copyright
   *    notice, this list of conditions and the following disclaimer in
   *    the documentation and/or other materials provided with the
   *    distribution.
   *
   * 3. The end-user documentation included with the redistribution,
   *    if any, must include the following acknowledgment:  
   *       "This product includes software developed by the
   *        Apache Software Foundation (http://www.apache.org/)."
   *    Alternately, this acknowledgment may appear in the software itself,
   *    if and wherever such third-party acknowledgments normally appear.
   *
   * 4. The names "Xerces" and "Apache Software Foundation" must
   *    not be used to endorse or promote products derived from this
   *    software without prior written permission. For written 
   *    permission, please contact apache@apache.org.
   *
   * 5. Products derived from this software may not be called "Apache",
   *    nor may "Apache" appear in their name, without prior written
   *    permission of the Apache Software Foundation.
   *
   * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
   * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
   * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
   * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
   * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
   * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
   * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
   * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
   * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   * SUCH DAMAGE.
   * ====================================================================
   *
   * This software consists of voluntary contributions made by many
   * individuals on behalf of the Apache Software Foundation and was
   * originally based on software copyright (c) 1999, International
   * Business Machines, Inc., http://www.apache.org.  For more
   * information on the Apache Software Foundation, please see
   * <http://www.apache.org/>.
   */
  
  package org.apache.xerces.parsers;
  
  import java.io.IOException;
  
  import org.apache.xerces.impl.Constants;
  import org.apache.xerces.impl.XML11DocumentScannerImpl;
  import org.apache.xerces.impl.XML11DTDScannerImpl;
  import org.apache.xerces.impl.XMLEntityManager;
  import org.apache.xerces.impl.XML11EntityManager;
  import org.apache.xerces.xni.grammars.XMLGrammarPool;
  
  import org.apache.xerces.util.SymbolTable;
  import org.apache.xerces.xni.XNIException;
  import org.apache.xerces.xni.parser.XMLComponent;
  import org.apache.xerces.xni.parser.XMLComponentManager;
  import org.apache.xerces.xni.parser.XMLConfigurationException;
  import org.apache.xerces.xni.parser.XMLDocumentScanner;
  import org.apache.xerces.xni.parser.XMLDTDScanner;
  
  /**
   * This class is the configuration used to parse XML 1.1 documents.
   * It extends the StandardParserConfiguration by making
   * use of classes which extend the basic scanner and entity management
   * implementations.  
   *
   * @author Neil Graham, IBM
   *
   * @version $Id: XML11Configuration.java,v 1.1 2002/07/31 14:08:12 neilg Exp $
   */
  public class XML11Configuration
      extends StandardParserConfiguration {
  
      //
      // Constants
      //
  
      //
      // Constructors
      //
  
      /** Default constructor. */
      public XML11Configuration() {
          this(null, null, null);
      } // <init>()
  
      /** 
       * Constructs a parser configuration using the specified symbol table. 
       *
       * @param symbolTable The symbol table to use.
       */
      public XML11Configuration(SymbolTable symbolTable) {
          this(symbolTable, null, null);
      } // <init>(SymbolTable)
  
      /**
       * Constructs a parser configuration using the specified symbol table and
       * grammar pool.
       * <p>
       * <strong>REVISIT:</strong> 
       * Grammar pool will be updated when the new validation engine is
       * implemented.
       *
       * @param symbolTable The symbol table to use.
       * @param grammarPool The grammar pool to use.
       */
      public XML11Configuration(SymbolTable symbolTable,
                                         XMLGrammarPool grammarPool) {
          this(symbolTable, grammarPool, null);
      } // <init>(SymbolTable,XMLGrammarPool)
  
      /**
       * Constructs a parser configuration using the specified symbol table,
       * grammar pool, and parent settings.
       * <p>
       * <strong>REVISIT:</strong> 
       * Grammar pool will be updated when the new validation engine is
       * implemented.
       *
       * @param symbolTable    The symbol table to use.
       * @param grammarPool    The grammar pool to use.
       * @param parentSettings The parent settings.
       */
      public XML11Configuration(SymbolTable symbolTable,
                                         XMLGrammarPool grammarPool,
                                         XMLComponentManager parentSettings) {
          super(symbolTable, grammarPool, parentSettings);
  
      } // <init>(SymbolTable,XMLGrammarPool)
  
      //
      // Public methods
      //
  
      // factory methods
  
      /** Creates an entity manager. */
      protected XMLEntityManager createEntityManager() {
          return new XML11EntityManager();
      } // createEntityManager():XMLEntityManager
  
      /** Create a document scanner. */
      protected XMLDocumentScanner createDocumentScanner() {
          return new XML11DocumentScannerImpl();
      } // createDocumentScanner():XMLDocumentScanner
  
      /** Create a DTD scanner. */
      protected XMLDTDScanner createDTDScanner() {
          return new XML11DTDScannerImpl();
      } // createDTDScanner():XMLDTDScanner
  
  } // class XML11Configuration
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org