You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by tn...@apache.org on 2003/01/06 20:43:18 UTC
cvs commit: xml-xerces/c/src/xercesc/util XMLUri.hpp XMLURL.cpp XMLURL.hpp
tng 2003/01/06 11:43:18
Modified: c/src/xercesc/util XMLUri.hpp XMLURL.cpp XMLURL.hpp
Log:
New feature StandardUriConformant to force strict standard uri conformance.
Revision Changes Path
1.7 +14 -11 xml-xerces/c/src/xercesc/util/XMLUri.hpp
Index: XMLUri.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLUri.hpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- XMLUri.hpp 21 Nov 2002 15:42:39 -0000 1.6
+++ XMLUri.hpp 6 Jan 2003 19:43:18 -0000 1.7
@@ -57,6 +57,9 @@
/*
* $Id$
* $Log$
+ * Revision 1.7 2003/01/06 19:43:18 tng
+ * New feature StandardUriConformant to force strict standard uri conformance.
+ *
* Revision 1.6 2002/11/21 15:42:39 gareth
* Implemented copy constructor and operator =. Patch by Jennifer Schachter.
*
@@ -317,6 +320,16 @@
// Miscellaneous methods
// -----------------------------------------------------------------------
+ /**
+ * Determine whether a given string contains only URI characters (also
+ * called "uric" in RFC 2396). uric consist of all reserved
+ * characters, unreserved characters and escaped characters.
+ *
+ * @return true if the string is comprised of uric, false otherwise
+ */
+ static bool isURIString(const XMLCh* const uric);
+
+
private:
static const XMLCh RESERVED_CHARACTERS[];
@@ -360,16 +373,6 @@
* @return true if the scheme is conformant, false otherwise
*/
static void isConformantUserInfo(const XMLCh* const userInfo);
-
- /**
- * Determine whether a given string contains only URI characters (also
- * called "uric" in RFC 2396). uric consist of all reserved
- * characters, unreserved characters and escaped characters.
- *
- * @return true if the string is comprised of uric, false otherwise
- */
- static bool isURIString(const XMLCh* const uric);
-
/**
* Determine whether a string is syntactically capable of representing
* a valid IPv4 address or the domain name of a network host.
1.5 +34 -151 xml-xerces/c/src/xercesc/util/XMLURL.cpp
Index: XMLURL.cpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.cpp,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- XMLURL.cpp 6 Dec 2002 17:05:29 -0000 1.4
+++ XMLURL.cpp 6 Jan 2003 19:43:18 -0000 1.5
@@ -72,6 +72,7 @@
#include <xercesc/util/XMLString.hpp>
#include <xercesc/util/XMLUniDefs.hpp>
#include <xercesc/util/XMLUni.hpp>
+#include <xercesc/util/XMLUri.hpp>
XERCES_CPP_NAMESPACE_BEGIN
@@ -191,6 +192,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
}
@@ -206,6 +208,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
try
{
@@ -230,6 +233,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
XMLCh* tmpRel = XMLString::transcode(relativeURL);
ArrayJanitor<XMLCh> janRel(tmpRel);
@@ -256,6 +260,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
try
{
@@ -280,6 +285,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
XMLCh* tmpRel = XMLString::transcode(relativeURL);
ArrayJanitor<XMLCh> janRel(tmpRel);
@@ -306,6 +312,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
try
{
@@ -329,6 +336,7 @@
, fQuery(0)
, fUser(0)
, fURLText(0)
+ , fHasInvalidChar(false)
{
XMLCh* tmpText = XMLString::transcode(urlText);
ArrayJanitor<XMLCh> janRel(tmpText);
@@ -354,6 +362,7 @@
, fQuery(XMLString::replicate(toCopy.fQuery))
, fUser(XMLString::replicate(toCopy.fUser))
, fURLText(XMLString::replicate(toCopy.fURLText))
+ , fHasInvalidChar(toCopy.fHasInvalidChar)
{
}
@@ -503,6 +512,11 @@
}
+bool XMLURL::hasInvalidChar() const {
+ return fHasInvalidChar;
+}
+
+
BinInputStream* XMLURL::makeNewStream() const
{
//
@@ -514,38 +528,10 @@
{
if (!fHost || !XMLString::compareIString(fHost, XMLUni::fgLocalHostString))
{
- //
- // We have to play a little trick here. If its really a Windows
- // style fully qualified path, we have to toss the leading /
- // character.
- //
+
XMLCh* realPath = XMLString::replicate(fPath);
ArrayJanitor<XMLCh> basePathName(realPath);
- if (*fPath == chForwardSlash)
- {
- if (XMLString::stringLen(fPath) > 3)
- {
- if (*(fPath + 2) == chColon)
- {
- const XMLCh chDrive = *(fPath + 1);
- if (((chDrive >= chLatin_A) && (chDrive <= chLatin_Z))
- || ((chDrive >= chLatin_a) && (chDrive <= chLatin_z)))
- {
- realPath = fPath + 1;
- }
- }
-
- // Similarly for UNC paths
- if ( *(fPath + 1) == *(fPath + 2) &&
- (*(fPath + 1) == chForwardSlash ||
- *(fPath + 1) == chBackSlash) )
- {
- realPath = fPath + 1;
- }
- }
- }
-
//
// Need to manually replace any character reference %xx first
// HTTP protocol will be done automatically by the netaccessor
@@ -835,8 +821,11 @@
}
// Its a relative path, so weave them together.
- if (baseURL.fPath)
- weavePaths(baseURL.fPath);
+ if (baseURL.fPath) {
+ XMLCh* temp = XMLPlatformUtils::weavePaths(baseURL.fPath, fPath);
+ delete [] fPath;
+ fPath = temp;
+ }
// If we had any original path, then we are done
if (hadPath)
@@ -860,6 +849,12 @@
if (!*urlText)
ThrowXML(MalformedURLException, XMLExcepts::URL_NoProtocolPresent);
+ // Before we start, check if this urlText contains valid uri characters
+ if (!XMLUri::isURIString(urlText))
+ fHasInvalidChar = true;
+ else
+ fHasInvalidChar = false;
+
//
// The first thing we will do is to check for a file name, so that
// we don't waste time thinking its a URL. If its in the form x:\
@@ -988,17 +983,17 @@
}
else
{
- //
- // http protocol requires two forward slashes
- // we didn't get them, so throw an exception
- //
- if (fProtocol == HTTP) {
- ThrowXML
+ //
+ // http protocol requires two forward slashes
+ // we didn't get them, so throw an exception
+ //
+ if (fProtocol == HTTP) {
+ ThrowXML
(
MalformedURLException
, XMLExcepts::URL_ExpectingTwoSlashes
);
- }
+ }
}
//
@@ -1135,118 +1130,6 @@
}
}
-
-void XMLURL::weavePaths(const XMLCh* const basePart)
-{
- // Watch for stupid stuff
- if (!basePart)
- return;
- if (!*basePart)
- return;
-
- //
- // Ok, lets start at the end of the base path and work backwards and
- // our path part and work forwards. For each leading . we see, we just
- // eat it. For each leading .. we see, we eat it and throw away one
- // level in the source URL.
- //
- // If the last character in the base part is a forward slash, back
- // up one first before we look for the last slash.
- //
- const XMLCh* basePtr = basePart + (XMLString::stringLen(basePart) - 1);
- if (*basePtr == chForwardSlash)
- basePtr--;
-
- while ((basePtr >= basePart)
- && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
- {
- basePtr--;
- }
-
- if (basePtr < basePart)
- return;
-
- // Create a buffer as large as both parts
- XMLCh* tmpBuf = new XMLCh[XMLString::stringLen(fPath)
- + XMLString::stringLen(basePart)
- + 2];
- //
- // If we have no path part, then copy the base part up to the
- // base pointer
- //
- if (!fPath)
- {
- XMLCh* bufPtr = tmpBuf;
- const XMLCh* tmpPtr = basePart;
- while (tmpPtr <= basePtr)
- *bufPtr++ = *tmpPtr++;
- *bufPtr = 0;
-
- fPath = tmpBuf;
- return;
- }
-
- // After this, make sure the buffer gets handled if we exit early
- ArrayJanitor<XMLCh> janBuf(tmpBuf);
-
- //
- // We have some path part, so we need to check to see if we ahve to
- // weave any of the parts together.
- //
- XMLCh* pathPtr = fPath;
- while (true)
- {
- // If it does not start with some period, then we are done
- if (*pathPtr != chPeriod)
- break;
-
- unsigned int periodCount = 1;
- pathPtr++;
- if (*pathPtr == chPeriod)
- {
- pathPtr++;
- periodCount++;
- }
-
- // Has to be followed by a / or \ or the null to mean anything
- if ((*pathPtr != chForwardSlash) && (*pathPtr != chBackSlash)
- && *pathPtr)
- {
- break;
- }
- if (*pathPtr)
- pathPtr++;
-
- // If its one period, just eat it, else move backwards in the base
- if (periodCount == 2)
- {
- basePtr--;
- while ((basePtr >= basePart)
- && ((*basePtr != chForwardSlash) && (*basePtr != chBackSlash)))
- {
- basePtr--;
- }
-
- // There are not enough levels to handle all the .. parts
- if (basePtr < basePart)
- ThrowXML(MalformedURLException, XMLExcepts::URL_BaseUnderflow);
- }
- }
-
- // Copy the base part up to the base pointer
- XMLCh* bufPtr = tmpBuf;
- const XMLCh* tmpPtr = basePart;
- while (tmpPtr <= basePtr)
- *bufPtr++ = *tmpPtr++;
-
- // And then copy on the rest of our path
- XMLString::copyString(bufPtr, pathPtr);
-
- // Now delete our path and make the new buffer our path
- delete [] fPath;
- janBuf.orphan();
- fPath = tmpBuf;
-}
XERCES_CPP_NAMESPACE_END
1.4 +7 -2 xml-xerces/c/src/xercesc/util/XMLURL.hpp
Index: XMLURL.hpp
===================================================================
RCS file: /home/cvs/xml-xerces/c/src/xercesc/util/XMLURL.hpp,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- XMLURL.hpp 4 Nov 2002 15:22:05 -0000 1.3
+++ XMLURL.hpp 6 Jan 2003 19:43:18 -0000 1.4
@@ -177,6 +177,7 @@
// Miscellaneous methods
// -----------------------------------------------------------------------
bool isRelative() const;
+ bool hasInvalidChar() const;
BinInputStream* makeNewStream() const;
void makeRelativeTo(const XMLCh* const baseURLText);
void makeRelativeTo(const XMLURL& baseURL);
@@ -193,7 +194,6 @@
(
const XMLCh* const urlText
);
- void weavePaths(const XMLCh* const basePart);
// -----------------------------------------------------------------------
@@ -231,6 +231,10 @@
// This is a copy of the URL text, after it has been taken apart,
// made relative if needed, canonicalized, and then put back
// together. Its only created upon demand.
+ //
+ // fHasInvalidChar
+ // This indicates if the URL Text contains invalid characters as per
+ // RFC 2396 standard.
// -----------------------------------------------------------------------
XMLCh* fFragment;
XMLCh* fHost;
@@ -241,6 +245,7 @@
XMLCh* fQuery;
XMLCh* fUser;
XMLCh* fURLText;
+ bool fHasInvalidChar;
};
---------------------------------------------------------------------
To unsubscribe, e-mail: xerces-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: xerces-cvs-help@xml.apache.org