You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@activemq.apache.org by ta...@apache.org on 2009/04/03 16:08:21 UTC

svn commit: r761684 - /activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp

Author: tabish
Date: Fri Apr  3 14:08:21 2009
New Revision: 761684

URL: http://svn.apache.org/viewvc?rev=761684&view=rev
Log:
https://issues.apache.org/activemq/browse/AMQCPP-232

new patch for UTF-8 reader/writer code in openwire wireformat.

Modified:
    activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp

Modified: activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp
URL: http://svn.apache.org/viewvc/activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp?rev=761684&r1=761683&r2=761684&view=diff
==============================================================================
--- activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp (original)
+++ activemq/activemq-cpp/trunk/activemq-cpp/src/main/activemq/wireformat/openwire/utils/OpenwireStringSupport.cpp Fri Apr  3 14:08:21 2009
@@ -36,56 +36,66 @@
 
         short utflen = dataIn.readShort();
 
-        if( utflen > -1 ) {
-
+        if( utflen > -1 )
+        {
             // Let the stream get us all that data.
             std::vector<unsigned char> value;
             value.resize( utflen );
             dataIn.readFully( value );
 
-            std::vector<unsigned char> byteArr;
-            byteArr.resize( utflen );
-
+            unsigned char c = 0;
             int count = 0;
-            for( unsigned int i = 0; i < value.size(); ++i ) {
 
-                unsigned int z = (unsigned char) value[i];
+            // x counts the number of 2-byte UTF8 sequences decoded
+            int x = 0;
 
-                if( (z >= 0x0001) && (z <= 0x007F) )  {
-                    byteArr[count++] = (unsigned char)z;
-                } else if( (z >= 0x00C0) && (z <= 0x00DF) ) {
-                    unsigned int y = (unsigned char) value[++i];
-                    byteArr[count++] = (unsigned char)((z-192)*64 + (y-128));
-                } else if ((z >= 0x00E0) && (z <= 0x00EF) ) {
-                    unsigned int y = (unsigned char) value[++i];
-                    unsigned int x = (unsigned char) value[++i];
-                    byteArr[count++] = (unsigned char)((z-224)*4096 + (y-128)*64 + (x-128));
-                } else if( (z >= 0x00F0) && (z <= 0x00F7) ) {
-                    unsigned int y = (unsigned char) value[++i];
-                    unsigned int x = (unsigned char) value[++i];
-                    unsigned int w = (unsigned char) value[++i];
-                    byteArr[count++] = (unsigned char)((z-240)*262144 + (y-128)*4096 + (x-128)*64 + (w-128));
-                } else if( (z >= 0x00F8) && (z <= 0x00FB) ) {
-                    unsigned int y = (unsigned char) value[++i];
-                    unsigned int x = (unsigned char) value[++i];
-                    unsigned int w = (unsigned char) value[++i];
-                    unsigned int v = (unsigned char) value[++i];
-                    byteArr[count++] = (unsigned char)((z-248)*16777216 + (y-128)*262144 + (x-128)*4096 + (w-128)*64 + (v-128));
-                } else if( (z >= 0x00FC) && (z <= 0x00FD) ) {
-                    unsigned int y = (unsigned char) value[++i];
-                    unsigned int x = (unsigned char) value[++i];
-                    unsigned int w = (unsigned char) value[++i];
-                    unsigned int v = (unsigned char) value[++i];
-                    unsigned int u = (unsigned char) value[++i];
-                    byteArr[count++] = (unsigned char)((z-252)*1073741824 + (y-128)*16777216 + (x-128)*262144 + (w-128)*4096 + (v-128)*64 + (u-128));
+            while( count+x < utflen )
+            {
+                c = value[count+x];
+                switch( c >> 4 )
+                {
+                    case 0:
+                    case 1:
+                    case 2:
+                    case 3:
+                    case 4:
+                    case 5:
+                    case 6:
+                    case 7:
+                        // 1-byte UTF8 encoding: 0xxxxxxx
+                        value[count] = c;
+                        count++;
+                        break;
+                    case 12:
+                    case 13:
+                        // 2-byte UTF8 encoding: 110X XXxx 10xx xxxx
+                        // Bits set at 'X' means we have encountered a UTF8 encoded value
+                        // greater than 255, which is not supported.
+                        if( c & 0x1C ) {
+                            throw IOException(
+                                __FILE__,
+                                __LINE__,
+                                "OpenwireStringSupport::readString - Encoding not supported" );
+                        }
+                        // Place the decoded UTF8 character back into the value array
+                        value[count] = ((c & 0x1F) << 6) | (value[count+x+1] & 0x3F);
+                        count++;
+                        x++;
+                        break;
+                    case 14:
+                    default:
+                    {
+                        // 3-byte UTF8 encoding: 1110 xxxx  10xx xxxx  10xx xxxx
+                        throw IOException(
+                            __FILE__,
+                            __LINE__,
+                            "OpenwireStringSupport::readString - Encoding not supported" );
+                    }
                 }
             }
 
-            // C++ strings need a NULL terminator
-            byteArr.push_back( '\0' );
-
             // Let the Compiler give us a string.
-            return reinterpret_cast<const char*>( &byteArr[0] );
+            return std::string(reinterpret_cast<const char*>(&value[0]), count);
         }
 
         return "";
@@ -98,7 +108,7 @@
 ////////////////////////////////////////////////////////////////////////////////
 void OpenwireStringSupport::writeString( decaf::io::DataOutputStream& dataOut,
                                          const std::string* str )
-                                            throw ( decaf::io::IOException ) {
+                                         throw ( decaf::io::IOException ) {
 
     try {
 
@@ -115,16 +125,15 @@
             }
 
             unsigned short utflen = 0;
-            unsigned int c, count = 0;
+            int count = 0;
+            unsigned char c;
 
             std::string::const_iterator iter = str->begin();
 
             for(; iter != str->end(); ++iter ) {
-                c = (unsigned char) *iter;
-                if( (c >= 0x0001) && (c <= 0x007F) ) {
+                c = *iter;
+                if( c < 0x80 ) {
                     utflen++;
-                } else if( c > 0x07FF ) {
-                    utflen += 3;
                 } else {
                     utflen += 2;
                 }
@@ -136,16 +145,12 @@
 
             for( iter = str->begin(); iter != str->end(); ++iter ) {
 
-                c = (unsigned char) *iter;
-                if( (c >= 0x0001) && (c <= 0x007F) ) {
+                c = *iter;
+                if( c < 0x80 ) {
                     byteArr[count++] = (unsigned char)c;
-                } else if( c <= 0x07FF ) {
-                    byteArr[count++] = (unsigned char)( 192 + (c / 64));
-                    byteArr[count++] = (unsigned char)( 128 + (c % 64));
                 } else {
-                    byteArr[count++] = (unsigned char)( 224 + (c / 4096));
-                    byteArr[count++] = (unsigned char)( 192 + ((c / 64) % 64));
-                    byteArr[count++] = (unsigned char)( 128 + (c % 64));
+                    byteArr[count++] = (unsigned char)( 0xC0 | ( (c >> 6) & 0x1F) );
+                    byteArr[count++] = (unsigned char)( 0x80 | ( (c >> 0) & 0x3F) );
                 }
             }