You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by tn...@apache.org on 2003/01/06 20:43:18 UTC
cvs commit: xml-xerces/c/src/xercesc/util XMLUri.hpp XMLURL.cpp XMLURL.hpp

tng         2003/01/06 11:43:18

  Modified:    c/src/xercesc/util XMLUri.hpp XMLURL.cpp XMLURL.hpp
  Log:
  New feature StandardUriConformant to force strict standard uri conformance.
  
  Revision  Changes    Path
  1.7       +14 -11    xml-xerces/c/src/xercesc/util/XMLUri.hpp
  
  Index: XMLUri.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- XMLUri.hpp	21 Nov 2002 15:42:39 -0000	1.6
  +++ XMLUri.hpp	6 Jan 2003 19:43:18 -0000	1.7
  @@ -57,6 +57,9 @@
   /*
    * $Id$
    * $Log$
  + * Revision 1.7  2003/01/06 19:43:18  tng
  + * New feature StandardUriConformant to force strict standard uri conformance.
  + *
    * Revision 1.6  2002/11/21 15:42:39  gareth
    * Implemented copy constructor and operator =. Patch by Jennifer Schachter.
    *
  @@ -317,6 +320,16 @@
       //  Miscellaneous methods
       // -----------------------------------------------------------------------
   
  +    /**
  +     * Determine whether a given string contains only URI characters (also
  +     * called "uric" in RFC 2396). uric consist of all reserved
  +     * characters, unreserved characters and escaped characters.
  +     *
  +     * @return true if the string is comprised of uric, false otherwise
  +     */
  +    static bool isURIString(const XMLCh* const uric);
  +
  +
   private:
   
       static const XMLCh RESERVED_CHARACTERS[];
  @@ -360,16 +373,6 @@
        * @return true if the scheme is conformant, false otherwise
        */
       static void isConformantUserInfo(const XMLCh* const userInfo);
  -
  -    /**
  -     * Determine whether a given string contains only URI characters (also
  -     * called "uric" in RFC 2396). uric consist of all reserved
  -     * characters, unreserved characters and escaped characters.
  -     *
  -     * @return true if the string is comprised of uric, false otherwise
  -     */
  -    static bool isURIString(const XMLCh* const uric);
  -
       /**
        * Determine whether a string is syntactically capable of representing
        * a valid IPv4 address or the domain name of a network host.
  
  
  
  1.5       +34 -151   xml-xerces/c/src/xercesc/util/XMLURL.cpp
  
  Index: XMLURL.cpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.cpp,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- XMLURL.cpp	6 Dec 2002 17:05:29 -0000	1.4
  +++ XMLURL.cpp	6 Jan 2003 19:43:18 -0000	1.5
  @@ -72,6 +72,7 @@
   #include <xercesc/util/XMLString.hpp>
   #include <xercesc/util/XMLUniDefs.hpp>
   #include <xercesc/util/XMLUni.hpp>
  +#include <xercesc/util/XMLUri.hpp>
   
   XERCES_CPP_NAMESPACE_BEGIN
   
  @@ -191,6 +192,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
   }
   
  @@ -206,6 +208,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
   	try
   	{
  @@ -230,6 +233,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
       XMLCh* tmpRel = XMLString::transcode(relativeURL);
       ArrayJanitor<XMLCh> janRel(tmpRel);
  @@ -256,6 +260,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
   	try
   	{
  @@ -280,6 +285,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
       XMLCh* tmpRel = XMLString::transcode(relativeURL);
       ArrayJanitor<XMLCh> janRel(tmpRel);
  @@ -306,6 +312,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
   	try
   	{
  @@ -329,6 +336,7 @@
       , fQuery(0)
       , fUser(0)
       , fURLText(0)
  +    , fHasInvalidChar(false)
   {
       XMLCh* tmpText = XMLString::transcode(urlText);
       ArrayJanitor<XMLCh> janRel(tmpText);
  @@ -354,6 +362,7 @@
       , fQuery(XMLString::replicate(toCopy.fQuery))
       , fUser(XMLString::replicate(toCopy.fUser))
       , fURLText(XMLString::replicate(toCopy.fURLText))
  +    , fHasInvalidChar(toCopy.fHasInvalidChar)
   {
   }
   
  @@ -503,6 +512,11 @@
   }
   
   
  +bool XMLURL::hasInvalidChar() const {
  +    return fHasInvalidChar;
  +}
  +
  +
   BinInputStream* XMLURL::makeNewStream() const
   {
       //
  @@ -514,38 +528,10 @@
       {
           if (!fHost || !XMLString::compareIString(fHost, XMLUni::fgLocalHostString))
           {
  -            //
  -            //  We have to play a little trick here. If its really a Windows
  -            //  style fully qualified path, we have to toss the leading /
  -            //  character.
  -            //
  +
               XMLCh* realPath = XMLString::replicate(fPath);
               ArrayJanitor<XMLCh> basePathName(realPath);
   
  -            if (*fPath == chForwardSlash)
  -            {
  -                if (XMLString::stringLen(fPath) > 3)
  -                {
  -                    if (*(fPath + 2) == chColon)
  -                    {
  -                        const XMLCh chDrive = *(fPath + 1);
  -                        if (((chDrive >= chLatin_A) && (chDrive <= chLatin_Z))
  -                        ||  ((chDrive >= chLatin_a) && (chDrive <= chLatin_z)))
  -                        {
  -                            realPath = fPath + 1;
  -                        }
  -                    }
  -
  -                    // Similarly for UNC paths
  -                    if ( *(fPath + 1) == *(fPath + 2) &&
  -                         (*(fPath + 1) == chForwardSlash ||
  -                          *(fPath + 1) == chBackSlash) )
  -                    {
  -                        realPath = fPath + 1;
  -                    }
  -                }
  -            }
  -
               //
               // Need to manually replace any character reference %xx first
               // HTTP protocol will be done automatically by the netaccessor
  @@ -835,8 +821,11 @@
       }
   
       // Its a relative path, so weave them together.
  -    if (baseURL.fPath)
  -        weavePaths(baseURL.fPath);
  +    if (baseURL.fPath) {
  +        XMLCh* temp = XMLPlatformUtils::weavePaths(baseURL.fPath, fPath);
  +        delete [] fPath;
  +        fPath = temp;
  +    }
   
       // If we had any original path, then we are done
       if (hadPath)
  @@ -860,6 +849,12 @@
       if (!*urlText)
           ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
   
  +    // Before we start, check if this urlText contains valid uri characters
  +    if (!XMLUri::isURIString(urlText))
  +        fHasInvalidChar = true;
  +    else
  +        fHasInvalidChar = false;
  +
       //
       //  The first thing we will do is to check for a file name, so that
       //  we don't waste time thinking its a URL. If its in the form x:\
  @@ -988,17 +983,17 @@
       }
       else
       {
  -	    //
  -	    // http protocol requires two forward slashes
  -	    // we didn't get them, so throw an exception
  -	    //
  -	if (fProtocol == HTTP) {
  -                ThrowXML
  +        //
  +        // http protocol requires two forward slashes
  +        // we didn't get them, so throw an exception
  +        //
  +        if (fProtocol == HTTP) {
  +            ThrowXML
                   (
                       MalformedURLException
                       , XMLExcepts::URL_ExpectingTwoSlashes
                   );
  -	}
  +        }
       }
   
       //
  @@ -1135,118 +1130,6 @@
       }
   }
   
  -
  -void XMLURL::weavePaths(const XMLCh* const basePart)
  -{
  -    // Watch for stupid stuff
  -    if (!basePart)
  -        return;
  -    if (!*basePart)
  -        return;
  -
  -    //
  -    //  Ok, lets start at the end of the base path and work backwards and
  -    //  our path part and work forwards. For each leading . we see, we just
  -    //  eat it. For each leading .. we see, we eat it and throw away one
  -    //  level in the source URL.
  -    //
  -    //  If the last character in the base part is a forward slash, back
  -    //  up one first before we look for the last slash.
  -    //
  -    const XMLCh* basePtr = basePart + (XMLString::stringLen(basePart) - 1);
  -    if (*basePtr == chForwardSlash)
  -        basePtr--;
  -
  -    while ((basePtr >= basePart)
  -    &&     ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
  -    {
  -        basePtr--;
  -    }
  -
  -    if (basePtr < basePart)
  -        return;
  -
  -    // Create a buffer as large as both parts
  -    XMLCh* tmpBuf = new XMLCh[XMLString::stringLen(fPath)
  -                              + XMLString::stringLen(basePart)
  -                              + 2];
  -    //
  -    //  If we have no path part, then copy the base part up to the
  -    //  base pointer
  -    //
  -    if (!fPath)
  -    {
  -        XMLCh* bufPtr = tmpBuf;
  -        const XMLCh* tmpPtr = basePart;
  -        while (tmpPtr <= basePtr)
  -            *bufPtr++ = *tmpPtr++;
  -        *bufPtr = 0;
  -
  -        fPath = tmpBuf;
  -        return;
  -    }
  -
  -    // After this, make sure the buffer gets handled if we exit early
  -    ArrayJanitor<XMLCh> janBuf(tmpBuf);
  -
  -    //
  -    //  We have some path part, so we need to check to see if we ahve to
  -    //  weave any of the parts together.
  -    //
  -    XMLCh* pathPtr = fPath;
  -    while (true)
  -    {
  -        // If it does not start with some period, then we are done
  -        if (*pathPtr != chPeriod)
  -            break;
  -
  -        unsigned int periodCount = 1;
  -        pathPtr++;
  -        if (*pathPtr == chPeriod)
  -        {
  -            pathPtr++;
  -            periodCount++;
  -        }
  -
  -        // Has to be followed by a / or \ or the null to mean anything
  -        if ((*pathPtr != chForwardSlash) && (*pathPtr != chBackSlash)
  -        &&  *pathPtr)
  -        {
  -            break;
  -        }
  -        if (*pathPtr)
  -            pathPtr++;
  -
  -        // If its one period, just eat it, else move backwards in the base
  -        if (periodCount == 2)
  -        {
  -            basePtr--;
  -            while ((basePtr >= basePart)
  -            &&     ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
  -            {
  -                basePtr--;
  -            }
  -
  -            // There are not enough levels to handle all the .. parts
  -            if (basePtr < basePart)
  -                ThrowXML(MalformedURLException, XMLExcepts::URL_BaseUnderflow);
  -        }
  -    }
  -
  -    // Copy the base part up to the base pointer
  -    XMLCh* bufPtr = tmpBuf;
  -    const XMLCh* tmpPtr = basePart;
  -    while (tmpPtr <= basePtr)
  -        *bufPtr++ = *tmpPtr++;
  -
  -    // And then copy on the rest of our path
  -    XMLString::copyString(bufPtr, pathPtr);
  -
  -    // Now delete our path and make the new buffer our path
  -    delete [] fPath;
  -    janBuf.orphan();
  -    fPath = tmpBuf;
  -}
   
   XERCES_CPP_NAMESPACE_END
   
  
  
  
  1.4       +7 -2      xml-xerces/c/src/xercesc/util/XMLURL.hpp
  
  Index: XMLURL.hpp
  ===================================================================
  RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.hpp,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- XMLURL.hpp	4 Nov 2002 15:22:05 -0000	1.3
  +++ XMLURL.hpp	6 Jan 2003 19:43:18 -0000	1.4
  @@ -177,6 +177,7 @@
       //  Miscellaneous methods
       // -----------------------------------------------------------------------
       bool isRelative() const;
  +    bool hasInvalidChar() const;
       BinInputStream* makeNewStream() const;
       void makeRelativeTo(const XMLCh* const baseURLText);
       void makeRelativeTo(const XMLURL& baseURL);
  @@ -193,7 +194,6 @@
       (
           const   XMLCh* const    urlText
       );
  -    void weavePaths(const XMLCh* const basePart);
   
   
       // -----------------------------------------------------------------------
  @@ -231,6 +231,10 @@
       //      This is a copy of the URL text, after it has been taken apart,
       //      made relative if needed, canonicalized, and then put back
       //      together. Its only created upon demand.
  +    //
  +    //  fHasInvalidChar
  +    //      This indicates if the URL Text contains invalid characters as per
  +    //      RFC 2396 standard.
       // -----------------------------------------------------------------------
       XMLCh*          fFragment;
       XMLCh*          fHost;
  @@ -241,6 +245,7 @@
       XMLCh*          fQuery;
       XMLCh*          fUser;
       XMLCh*          fURLText;
  +    bool            fHasInvalidChar;
   };
   
   
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org