You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2008/01/04 19:25:55 UTC

svn commit: r608953 - in /xerces/c/branches/xerces-2/src/xercesc/validators/datatype: AnyURIDatatypeValidator.cpp AnyURIDatatypeValidator.hpp

Author: amassari
Date: Fri Jan  4 10:25:55 2008
New Revision: 608953

URL: http://svn.apache.org/viewvc?rev=608953&view=rev
Log:
- If the URI contains several non-ASCII characters, the optimistic allocation (len*3) could run out of space (as each non-ASCII char would be replaced by at least 2 UTF-8 bytes, and each of them by 3 characters) -> use an XMLBuffer that can grow past the initial allocation
- the static buffers used by printf were 2 bytes long, but sprintf would also write the NULL terminator
- the converted UTF-8 string was converted partially, as the loop used the length of the original string instead of the length of the UTF-8 string
- each UTF-8 byte was assumed to be a signed byte, but XMLByte is unsigned; so the b<0 test was never true (and why add 256 to the signed value to make it positive? the new number would be different) and the next text was accessing a static array of 128 items with indexes between 0 and 255

Modified:
    xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp
    xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp

Modified: xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp
URL: http://svn.apache.org/viewvc/xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp?rev=608953&r1=608952&r2=608953&view=diff
==============================================================================
--- xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp (original)
+++ xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.cpp Fri Jan  4 10:25:55 2008
@@ -25,6 +25,7 @@
 #include <stdio.h>
 #include <xercesc/util/OutOfMemoryException.hpp>
 #include <xercesc/util/XMLUTF8Transcoder.hpp>
+#include <xercesc/framework/XMLBuffer.hpp>
 #include <xercesc/validators/datatype/AnyURIDatatypeValidator.hpp>
 #include <xercesc/validators/datatype/InvalidDatatypeFacetException.hpp>
 #include <xercesc/validators/datatype/InvalidDatatypeValueException.hpp>
@@ -82,10 +83,9 @@
         if (len)
         {          
             // Encode special characters using XLink 5.4 algorithm
-            XMLCh* encoded = (XMLCh*)manager->allocate((len*3+1) * sizeof(XMLCh));
-            ArrayJanitor<XMLCh> encodedJan(encoded, manager);
+			XMLBuffer encoded((len*3)+1, manager);
             encode(content, len, encoded, manager);
-            validURI = XMLUri::isValidURI(true, encoded);            
+            validURI = XMLUri::isValidURI(true, encoded.getRawBuffer(), true);            
         }
     }
     catch(const OutOfMemoryException&)
@@ -113,7 +113,7 @@
  * special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
  * and non-ASCII characters (whose value >= 128).
  ***/
-void AnyURIDatatypeValidator::encode(const XMLCh* const content, const unsigned int len, XMLCh* encoded, MemoryManager* const manager)
+void AnyURIDatatypeValidator::encode(const XMLCh* const content, const unsigned int len, XMLBuffer& encoded, MemoryManager* const manager)
 {
     static const bool needEscapeMap[] = {
         true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , true , /* 0x00 to 0x0F need escape */
@@ -139,15 +139,15 @@
 
         if (needEscapeMap[ch])
         {
-            char tempStr[2] = "\0";
+            char tempStr[3] = "\0";
             sprintf(tempStr, "%02X", ch);
-            encoded[bufferIndex++] = '%';
-            encoded[bufferIndex++] = (XMLCh)tempStr[0];
-            encoded[bufferIndex++] = (XMLCh)tempStr[1];
+            encoded.append('%');
+            encoded.append((XMLCh)tempStr[0]);
+            encoded.append((XMLCh)tempStr[1]);
         }
         else
         {
-            encoded[bufferIndex++] = (XMLCh)ch;
+            encoded.append((XMLCh)ch);
         }
     }
 
@@ -160,38 +160,27 @@
         unsigned int charsEaten;
 
         XMLUTF8Transcoder transcoder(XMLUni::fgUTF8EncodingString, remContentLen*4+1, manager);
-        transcoder.transcodeTo(remContent, remContentLen, UTF8Byte, remContentLen*4, charsEaten, XMLTranscoder::UnRep_RepChar);
+        unsigned int utf8Len = transcoder.transcodeTo(remContent, remContentLen, UTF8Byte, remContentLen*4, charsEaten, XMLTranscoder::UnRep_RepChar);
         assert(charsEaten == remContentLen);
 
         unsigned int j;
-        for (j = 0; j < remContentLen; j++) {
+        for (j = 0; j < utf8Len; j++) {
             XMLByte b = UTF8Byte[j];
-            // for non-ascii character: make it positive, then escape
-            if (b < 0) {
-                int ch = b + 256;
-                char tempStr[2] = "\0";
-                sprintf(tempStr, "%02X", ch);
-                encoded[bufferIndex++] = '%';
-                encoded[bufferIndex++] = (XMLCh)tempStr[0];
-                encoded[bufferIndex++] = (XMLCh)tempStr[1];
-            }
-            else if (needEscapeMap[b])
+            if (b >= 128 || needEscapeMap[b])
             {
-                char tempStr[2] = "\0";
+                char tempStr[3] = "\0";
                 sprintf(tempStr, "%02X", b);
-                encoded[bufferIndex++] = '%';
-                encoded[bufferIndex++] = (XMLCh)tempStr[0];
-                encoded[bufferIndex++] = (XMLCh)tempStr[1];
+                encoded.append('%');
+                encoded.append((XMLCh)tempStr[0]);
+                encoded.append((XMLCh)tempStr[1]);
             }
             else
             {
-                encoded[bufferIndex++] = (XMLCh)b;
+                encoded.append((XMLCh)b);
             }
         }
         manager->deallocate(UTF8Byte);
     }
-
-    encoded[bufferIndex] = (XMLCh)0;
 }
 
 /***

Modified: xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp
URL: http://svn.apache.org/viewvc/xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp?rev=608953&r1=608952&r2=608953&view=diff
==============================================================================
--- xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp (original)
+++ xerces/c/branches/xerces-2/src/xercesc/validators/datatype/AnyURIDatatypeValidator.hpp Fri Jan  4 10:25:55 2008
@@ -27,6 +27,8 @@
 
 XERCES_CPP_NAMESPACE_BEGIN
 
+class XMLBuffer;
+
 class VALIDATORS_EXPORT AnyURIDatatypeValidator : public AbstractStringValidator
 {
 public:
@@ -81,7 +83,7 @@
     // -----------------------------------------------------------------------
     AnyURIDatatypeValidator(const AnyURIDatatypeValidator&);
     AnyURIDatatypeValidator& operator=(const AnyURIDatatypeValidator&);    
-    void encode(const XMLCh* const content, const unsigned int len, XMLCh* encoded, MemoryManager* const manager);
+    void encode(const XMLCh* const content, const unsigned int len, XMLBuffer& encoded, MemoryManager* const manager);
 };
 
 XERCES_CPP_NAMESPACE_END



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org