You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@xerces.apache.org by am...@apache.org on 2006/09/22 10:09:12 UTC

svn commit: r448859 - /xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp

Author: amassari
Date: Fri Sep 22 01:09:09 2006
New Revision: 448859

URL: http://svn.apache.org/viewvc?view=rev&rev=448859
Log:
If ICU 2.4 or later is available, ask it to compute all the case variants of a case-insensitive range

Modified:
    xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp

Modified: xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp
URL: http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp?view=diff&rev=448859&r1=448858&r2=448859
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RangeToken.cpp Fri Sep 22 01:09:09 2006
@@ -30,11 +30,16 @@
 #include <xercesc/util/regx/RangeToken.hpp>
 #include <xercesc/util/regx/TokenFactory.hpp>
 #include <xercesc/util/IllegalArgumentException.hpp>
+#include <xercesc/util/XMLUniDefs.hpp>
 
 #if XERCES_USE_TRANSCODER_ICU
   #include <unicode/uchar.h>
-#else
-  #include <xercesc/util/XMLUniDefs.hpp>
+
+#if (U_ICU_VERSION_MAJOR_NUM >= 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4)
+  #include <unicode/uset.h>
+  #include <xercesc/util/XMLString.hpp>
+  #include <xercesc/util/Janitor.hpp>
+#endif
 #endif
 
 XERCES_CPP_NAMESPACE_BEGIN
@@ -81,6 +86,64 @@
         bool isNRange = (getTokenType() == T_NRANGE) ? true : false;
         RangeToken* lwrToken = tokFactory->createRange(isNRange);
 
+#if XERCES_USE_TRANSCODER_ICU && ((U_ICU_VERSION_MAJOR_NUM >= 2) || (U_ICU_VERSION_MAJOR_NUM == 2 && U_ICU_VERSION_MINOR_NUM >=4))
+        UChar* rangeStr=(UChar*)fMemoryManager->allocate(40*fElemCount*sizeof(UChar));
+        ArrayJanitor<UChar> janRange(rangeStr, fMemoryManager);
+        int c=0;
+        rangeStr[c++] = chOpenSquare;
+        for (unsigned int i = 0;  i < fElemCount - 1;  i += 2) {
+            XMLCh buffer[10];
+            unsigned int len, j;
+
+            rangeStr[c++] = chBackSlash;
+            rangeStr[c++] = chLatin_U;
+            XMLString::binToText(fRanges[i], buffer, 10, 16, fMemoryManager);
+            len = XMLString::stringLen(buffer);
+            for(j=0;j<(8-len);j++)
+                rangeStr[c++] = chDigit_0;
+            XMLCh* p=buffer;
+            while(*p)
+                rangeStr[c++] = *p++;
+            if(fRanges[i+1]!=fRanges[i])
+            {
+                rangeStr[c++] = chDash;
+                rangeStr[c++] = chBackSlash;
+                rangeStr[c++] = chLatin_U;
+                XMLString::binToText(fRanges[i+1], buffer, 10, 16, fMemoryManager);
+                len = XMLString::stringLen(buffer);
+                for(j=0;j<(8-len);j++)
+                    rangeStr[c++] = chDigit_0;
+                p=buffer;
+                while(*p)
+                    rangeStr[c++] = *p++;
+            }
+        }
+        rangeStr[c++] = chCloseSquare;
+        rangeStr[c++] = chNull;
+        UErrorCode ec=U_ZERO_ERROR;
+        USet* range=uset_openPatternOptions(rangeStr, -1, USET_CASE_INSENSITIVE, &ec);
+        if(range)
+        {
+            ec = U_ZERO_ERROR;
+            uint32_t cbCount=uset_serialize(range, NULL, 0, &ec);
+            uint16_t* buffer=(uint16_t*)fMemoryManager->allocate(cbCount*sizeof(uint16_t));
+            ArrayJanitor<uint16_t> janSet(buffer, fMemoryManager);
+            ec = U_ZERO_ERROR;
+            uset_serialize(range, buffer, cbCount, &ec);
+            USerializedSet serializedSet;
+            uset_getSerializedSet(&serializedSet, buffer, cbCount);
+            int32_t nSets=uset_getSerializedRangeCount(&serializedSet);
+            for(int32_t i=0; i<nSets; i++)
+            {
+                UChar32 start, end;
+                uset_getSerializedRange(&serializedSet, i, &start, &end);
+                lwrToken->addRange(start, end);
+            }
+            // does this release the memory allocated by the set?
+            uset_setSerializedToOne(&serializedSet, 32);
+            uset_close(range);
+        }
+#else
         for (unsigned int i = 0;  i < fElemCount - 1;  i += 2) {
             for (XMLInt32 ch = fRanges[i];  ch <= fRanges[i + 1];  ++ch) {
 #if XERCES_USE_TRANSCODER_ICU
@@ -122,6 +185,7 @@
         }
 
         lwrToken->mergeRanges(this);
+#endif
         lwrToken->compactRanges();
         lwrToken->createMap();
 



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@xerces.apache.org
For additional commands, e-mail: commits-help@xerces.apache.org