You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by kn...@apache.org on 2001/05/03 20:43:14 UTC

cvs commit: xml-xerces/c/src/util PlatformUtils.cpp PlatformUtils.hpp XMLUniDefs.hpp

knoaman     01/05/03 11:43:13

  Modified:    c/src/internal XMLReader.cpp XMLReader.hpp
               c/src/util PlatformUtils.cpp PlatformUtils.hpp
                        XMLUniDefs.hpp
  Log:
  Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
  
  Revision  Changes    Path
  1.25      +66 -21    xml-xerces/c/src/internal/XMLReader.cpp
  
  Index: XMLReader.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.cpp,v
  retrieving revision 1.24
  retrieving revision 1.25
  diff -u -r1.24 -r1.25
  --- XMLReader.cpp	2001/02/14 14:44:11	1.24
  +++ XMLReader.cpp	2001/05/03 18:42:46	1.25
  @@ -55,7 +55,7 @@
    */
   
   /*
  - * $Id: XMLReader.cpp,v 1.24 2001/02/14 14:44:11 knoaman Exp $
  + * $Id: XMLReader.cpp,v 1.25 2001/05/03 18:42:46 knoaman Exp $
    */
   
   // ---------------------------------------------------------------------------
  @@ -70,7 +70,6 @@
   #include <util/UTFDataFormatException.hpp>
   #include <util/XMLEBCDICTranscoder.hpp>
   #include <util/XMLString.hpp>
  -#include <util/XMLUniDefs.hpp>
   #include <util/XMLUni.hpp>
   #include <sax/InputSource.hpp>
   #include <framework/XMLBuffer.hpp>
  @@ -80,7 +79,10 @@
   #include <string.h>
   
   
  -
  +// ---------------------------------------------------------------------------
  +//  XMLReader: static data initialization
  +// ---------------------------------------------------------------------------
  +bool XMLReader::fNEL = false;
   
   // ---------------------------------------------------------------------------
   //  XMLReader: Public, static methods
  @@ -88,11 +90,17 @@
   bool XMLReader::isFirstNameChar(const XMLCh toCheck)
   {
       static const XMLByte ourMask = gBaseCharMask | gLetterCharMask;
  -    if ((fgCharCharsTable[toCheck] & ourMask) != 0)
  +    XMLCh toCheckCh = toCheck;
  +
  +    if (fNEL && (toCheckCh == chNEL)) {
  +        toCheckCh = chLF;
  +    }
  +
  +    if ((fgCharCharsTable[toCheckCh] & ourMask) != 0)
           return true;
   
       // Check the two special case name start chars
  -    if ((toCheck == chUnderscore) || (toCheck == chColon))
  +    if ((toCheckCh == chUnderscore) || (toCheckCh == chColon))
           return true;
   
       return false;
  @@ -110,6 +118,12 @@
       const XMLCh* endPtr = toCheck + count;
       while (curCh < endPtr)
       {
  +        if (fNEL && (fgCharCharsTable[*curCh] == chNEL)) {
  +
  +            *curCh++;
  +            continue;
  +        }
  +
           if (!(fgCharCharsTable[*curCh++] & gWhitespaceCharMask))
               return false;
       }
  @@ -128,6 +142,10 @@
       const XMLCh* endPtr = toCheck + count;
       while (curCh < endPtr)
       {
  +		if (fNEL && (fgCharCharsTable[*curCh] == chNEL)) {
  +            return true;
  +        }
  +
           if (fgCharCharsTable[*curCh++] & gWhitespaceCharMask)
               return true;
       }
  @@ -147,6 +165,13 @@
   }
   
   
  +void XMLReader::enableNELWS() {
  +
  +    if (!fNEL) {
  +        fNEL = true;
  +    }
  +}
  +
   // ---------------------------------------------------------------------------
   //  XMLReader: Constructors and Destructor
   // ---------------------------------------------------------------------------
  @@ -596,14 +621,16 @@
               //
               if (fCharIndex < fCharsAvail)
               {
  -                if (fCharBuf[fCharIndex] == chLF)
  +                if (fCharBuf[fCharIndex] == chLF
  +                    || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                       fCharIndex++;
               }
                else
               {
                   if (refreshCharBuffer())
                   {
  -                    if (fCharBuf[fCharIndex] == chLF)
  +                    if (fCharBuf[fCharIndex] == chLF 
  +                        || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                           fCharIndex++;
                   }
               }
  @@ -616,8 +643,10 @@
           fCurCol = 1;
           fCurLine++;
       }
  -     else if (chGotten == chLF)
  +     else if (chGotten == chLF
  +              || (fNEL && (chGotten == chNEL)))
       {
  +        chGotten = chLF;
           fCurLine++;
           fCurCol = 1;
       }
  @@ -682,22 +711,26 @@
                       {
                           if (fCharIndex < fCharsAvail)
                           {
  -                            if (fCharBuf[fCharIndex] == chLF)
  +                            if (fCharBuf[fCharIndex] == chLF
  +                                || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                   fCharIndex++;
                           }
                            else
                           {
                               if (refreshCharBuffer())
                               {
  -                                if (fCharBuf[fCharIndex] == chLF)
  +                                if (fCharBuf[fCharIndex] == chLF
  +                                    || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                       fCharIndex++;
                               }
                           }
                           curCh = chLF;
                       }
                   }
  -                 else if (curCh == chLF)
  +                 else if (curCh == chLF
  +                          || (fNEL && (curCh == chNEL)))
                   {
  +                    curCh = chLF;
                       fCurCol = 1;
                       fCurLine++;
                   }
  @@ -765,22 +798,26 @@
                       {
                           if (fCharIndex < fCharsAvail)
                           {
  -                            if (fCharBuf[fCharIndex] == chLF)
  +                            if (fCharBuf[fCharIndex] == chLF
  +                                || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                   fCharIndex++;
                           }
                            else
                           {
                               if (refreshCharBuffer())
                               {
  -                                if (fCharBuf[fCharIndex] == chLF)
  +                                if (fCharBuf[fCharIndex] == chLF
  +                                    || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                       fCharIndex++;
                               }
                           }
                           curCh = chLF;
                       }
                   }
  -                 else if (curCh == chLF)
  +                 else if (curCh == chLF
  +                          || (fNEL && (curCh == chNEL)))
                   {
  +                    curCh = chLF;
                       fCurCol = 1;
                       fCurLine++;
                   }
  @@ -839,7 +876,8 @@
       //  normal char get method in regards to newline normalization, though
       //  its not as complicated as the actual character getting method's.
       //
  -    if ((fSource == Source_External) && (chGotten == chCR))
  +    if ((fSource == Source_External) 
  +        && (chGotten == chCR || (fNEL && (chGotten == chNEL))))
           chGotten = chLF;
   
       return true;
  @@ -912,22 +950,26 @@
                       {
                           if (fCharIndex < fCharsAvail)
                           {
  -                            if (fCharBuf[fCharIndex] == chLF)
  +                            if (fCharBuf[fCharIndex] == chLF
  +                                || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                   fCharIndex++;
                           }
                            else
                           {
                               if (refreshCharBuffer())
                               {
  -                                if (fCharBuf[fCharIndex] == chLF)
  +                                if (fCharBuf[fCharIndex] == chLF
  +                                    || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                                       fCharIndex++;
                               }
                           }
                           curCh = chLF;
                       }
                   }
  -                 else if (curCh == chLF)
  +                 else if (curCh == chLF
  +                          || (fNEL && (curCh == chNEL)))
                   {
  +                    curCh = chLF;
                       fCurCol = 1;
                       fCurLine++;
                   }
  @@ -1015,20 +1057,23 @@
               {
                   if (fCharIndex < fCharsAvail)
                   {
  -                    if (fCharBuf[fCharIndex] == chLF)
  +                    if (fCharBuf[fCharIndex] == chLF
  +                        || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                           fCharIndex++;
                   }
                    else
                   {
                       if (refreshCharBuffer())
                       {
  -                        if (fCharBuf[fCharIndex] == chLF)
  +                        if (fCharBuf[fCharIndex] == chLF
  +                            || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                               fCharIndex++;
                       }
                   }
               }
           }
  -         else if (curCh == chLF)
  +         else if (curCh == chLF
  +                  || (fNEL && (curCh == chNEL)))
           {
               fCurLine++;
               fCurCol = 1;
  
  
  
  1.15      +48 -10    xml-xerces/c/src/internal/XMLReader.hpp
  
  Index: XMLReader.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/internal/XMLReader.hpp,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -r1.14 -r1.15
  --- XMLReader.hpp	2001/01/25 19:16:58	1.14
  +++ XMLReader.hpp	2001/05/03 18:42:51	1.15
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: XMLReader.hpp,v $
  + * Revision 1.15  2001/05/03 18:42:51  knoaman
  + * Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
  + *
    * Revision 1.14  2001/01/25 19:16:58  tng
    * const should be used instead of static const.  Fixed by Khaled Noaman.
    *
  @@ -118,6 +121,7 @@
   #include <util/XMLUniDefs.hpp>
   #include <framework/XMLRecognizer.hpp>
   #include <framework/XMLBuffer.hpp>
  +#include <util/XMLUniDefs.hpp>
   
   class InputSource;
   class BinInputStream;
  @@ -138,7 +142,6 @@
   const XMLByte   gWhitespaceCharMask         = 0x80;
   
   
  -
   // ---------------------------------------------------------------------------
   //  Instances of this class are used to manage the content of entities. The
   //  scanner maintains a stack of these, one for each entity (this means entity
  @@ -151,6 +154,12 @@
   //  methods.
   //
   //  This is NOT to be derived from.
  +//
  +//  Note: We have added support for handling 390 NEL character as a whitespace.
  +//  Since the option is turned on, we will not be able to modify the
  +//  corresponding value of NEL (0x85) in the fgCharCharsTable. As a result,
  +//  everytime we use the fgCharChars table and the NEL option is turned on,
  +//  we will use the value of chLF in the fgCharCharsTable instead.
   // ---------------------------------------------------------------------------
   class XMLPARSER_EXPORT XMLReader
   {
  @@ -204,6 +213,11 @@
       static bool isXMLChar(const XMLCh toCheck);
       static bool isWhitespace(const XMLCh toCheck);
   
  +    /**
  +      * Return the value of fgNEL flag.
  +      */
  +    static bool isNELRecognized();
  +
   
       // -----------------------------------------------------------------------
       //  Constructors and Destructor
  @@ -320,6 +334,11 @@
       );
   
   
  +    /**
  +      * Method to enable NEL char to be treated as white space char.
  +      */
  +    static void enableNELWS();
  +
       // -----------------------------------------------------------------------
       //  Private helper methods
       // -----------------------------------------------------------------------
  @@ -517,8 +536,15 @@
       //      The character characteristics table. Bits in each byte, represent
       //      the characteristics of each character. It is generated via some
       //      code and then hard coded into the cpp file for speed.
  +    //
  +    //  fNEL
  +    //      Flag to respresents whether NEL whitespace recognition is enabled
  +    //      or disabled
       // -----------------------------------------------------------------------
       static const XMLByte    fgCharCharsTable[0x10000];
  +    static bool             fNEL;
  +
  +    friend class XMLPlatformUtils;
   };
   
   
  @@ -527,17 +553,19 @@
   // ---------------------------------------------------------------------------
   inline bool XMLReader::isBaseChar(const XMLCh toCheck)
   {
  -    return (fgCharCharsTable[toCheck] & gBaseCharMask) != 0;
  +    return ((fgCharCharsTable[toCheck] & gBaseCharMask) != 0);
   }
   
   inline bool XMLReader::isNameChar(const XMLCh toCheck)
   {
  -    return (fgCharCharsTable[toCheck] & gNameCharMask) != 0;
  +    return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
   }
   
   inline bool XMLReader::isPlainContentChar(const XMLCh toCheck)
   {
  -    return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
  +    return (fNEL && (toCheck == chNEL))
  +        ? ((fgCharCharsTable[chLF] & gPlainContentCharMask) != 0)
  +        : ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
   }
   
   
  @@ -548,7 +576,9 @@
   
   inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck)
   {
  -    return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
  +    return (fNEL && (toCheck == chNEL))
  +        ? ((fgCharCharsTable[chLF] & gSpecialStartTagCharMask) != 0)
  +        : ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
   }
   
   inline bool XMLReader::isXMLChar(const XMLCh toCheck)
  @@ -564,10 +594,11 @@
   
   inline bool XMLReader::isWhitespace(const XMLCh toCheck)
   {
  -    return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
  +    return (fNEL && (toCheck == chNEL))
  +        ? ((fgCharCharsTable[chLF] & gWhitespaceCharMask) != 0)
  +        : ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
   }
   
  -
   // ---------------------------------------------------------------------------
   //  XMLReader: Buffer management methods
   // ---------------------------------------------------------------------------
  @@ -635,7 +666,10 @@
       return fType;
   }
   
  +inline bool XMLReader::isNELRecognized() {
   
  +    return fNEL;
  +}
   
   // ---------------------------------------------------------------------------
   //  XMLReader: Setter methods
  @@ -750,14 +784,16 @@
               //
               if (fCharIndex < fCharsAvail)
               {
  -                if (fCharBuf[fCharIndex] == chLF)
  +                if (fCharBuf[fCharIndex] == chLF
  +                    || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                       fCharIndex++;
               }
                else
               {
                   if (refreshCharBuffer())
                   {
  -                    if (fCharBuf[fCharIndex] == chLF)
  +                    if (fCharBuf[fCharIndex] == chLF
  +                        || (fNEL && (fCharBuf[fCharIndex] == chNEL)))
                           fCharIndex++;
                   }
               }
  @@ -770,8 +806,10 @@
           fCurCol = 1;
           fCurLine++;
       }
  -     else if (chGotten == chLF)
  +     else if (chGotten == chLF
  +              || (fNEL && (chGotten == chNEL)))
       {
  +        chGotten = chLF;
           fCurLine++;
           fCurCol = 1;
       }
  
  
  
  1.12      +37 -0     xml-xerces/c/src/util/PlatformUtils.cpp
  
  Index: PlatformUtils.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/util/PlatformUtils.cpp,v
  retrieving revision 1.11
  retrieving revision 1.12
  diff -u -r1.11 -r1.12
  --- PlatformUtils.cpp	2001/02/07 17:46:34	1.11
  +++ PlatformUtils.cpp	2001/05/03 18:43:01	1.12
  @@ -56,6 +56,9 @@
   
   /*
    * $Log: PlatformUtils.cpp,v $
  + * Revision 1.12  2001/05/03 18:43:01  knoaman
  + * Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
  + *
    * Revision 1.11  2001/02/07 17:46:34  billsch
    * Rearranged statements in Initialize() so that platformInit() is called
    * before an XMLMutex is created.
  @@ -119,6 +122,8 @@
   #include <util/XMLString.hpp>
   #include <util/XMLNetAccessor.hpp>
   #include <util/XMLUni.hpp>
  +#include <internal/XMLReader.hpp>
  +#include <util/RuntimeException.hpp>
   
   
   // ---------------------------------------------------------------------------
  @@ -306,3 +311,34 @@
       delete gLazyData;
       gLazyData = 0;
   }
  +
  +// ---------------------------------------------------------------------------
  +//  XMLPlatformUtils: NEL Character Handling
  +// ---------------------------------------------------------------------------
  +void XMLPlatformUtils::recognizeNEL(bool state) {
  +
  +    //Make sure initialize has been called
  +    if (gInitFlag == 0) {
  +        return;
  +    }
  +
  +    if (state) {
  +
  +        if (!XMLReader::isNELRecognized()) {
  +            XMLReader::enableNELWS();
  +        }
  +    }
  +    else {
  +
  +        if (XMLReader::isNELRecognized()) {
  +            ThrowXML(RuntimeException, XMLExcepts::NEL_RepeatedCalls);
  +        }
  +    }
  +}
  +
  +
  +bool XMLPlatformUtils::isNELRecognized() {
  +
  +    return XMLReader::isNELRecognized();
  +}
  +
  
  
  
  1.10      +20 -1     xml-xerces/c/src/util/PlatformUtils.hpp
  
  Index: PlatformUtils.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/util/PlatformUtils.hpp,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- PlatformUtils.hpp	2000/05/04 02:43:45	1.9
  +++ PlatformUtils.hpp	2001/05/03 18:43:03	1.10
  @@ -55,7 +55,7 @@
    */
   
   /*
  - * $Id: PlatformUtils.hpp,v 1.9 2000/05/04 02:43:45 aruna1 Exp $
  + * $Id: PlatformUtils.hpp,v 1.10 2001/05/03 18:43:03 knoaman Exp $
    */
   
   
  @@ -515,6 +515,23 @@
   
       //@}
   
  +    /** @name NEL Character Handling  */
  +    //@{
  +	/**
  +      * This function enables the recognition of NEL char as whitespace chars
  +      * which is disabled by default.
  +      * It is only called once per process. Once it is set, any subsequent calls
  +      * will result in exception being thrown.
  +      *
  +      * Note: Turning this option on will make the parser non complicant.
  +      */
  +    static void recognizeNEL(bool state);
  +
  +    /**
  +      * Return the value of fgNEL flag.
  +      */
  +    static bool isNELRecognized();
  +    //@}
   
       //
       //  For internal use only.
  @@ -576,6 +593,8 @@
         * parser or utilities services!
         */
       static void platformTerm();
  +
  +    //@}
   };
   
   
  
  
  
  1.4       +2 -1      xml-xerces/c/src/util/XMLUniDefs.hpp
  
  Index: XMLUniDefs.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/util/XMLUniDefs.hpp,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- XMLUniDefs.hpp	2001/03/02 20:52:47	1.3
  +++ XMLUniDefs.hpp	2001/05/03 18:43:06	1.4
  @@ -55,7 +55,7 @@
    */
   
   /*
  - * $Id: XMLUniDefs.hpp,v 1.3 2001/03/02 20:52:47 knoaman Exp $
  + * $Id: XMLUniDefs.hpp,v 1.4 2001/05/03 18:43:06 knoaman Exp $
    */
   #if !defined(XMLUNIDEFS_HPP)
   #define XMLUNIDEFS_HPP
  @@ -94,6 +94,7 @@
   const XMLCh chEqual                 = 0x3D;
   const XMLCh chForwardSlash          = 0x2F;
   const XMLCh chGrave                 = 0x60;
  +const XMLCh chNEL                   = 0x85;
   const XMLCh chOpenAngle             = 0x3C;
   const XMLCh chOpenCurly             = 0x7B;
   const XMLCh chOpenParen             = 0x28;
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org