You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by al...@apache.org on 2017/03/24 16:58:16 UTC

nifi-minifi-cpp git commit: MINIFI-193: Ensure safe UTF encoding

Repository: nifi-minifi-cpp
Updated Branches:
  refs/heads/master 45e6d5165 -> 070d8758f


MINIFI-193: Ensure safe UTF encoding

Since the C++ library is agnostic of UTF-8 we can safely write the bytes.
Since we won't be interpreting the UTF-8 code in the core library
we do not need any additional dependencies. Nor do we need to worry about
encoding beyond proper serialization and deserialization of the byte array.

This closes #68.

Signed-off-by: Aldrin Piri <al...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/repo
Commit: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/commit/070d8758
Tree: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/tree/070d8758
Diff: http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/diff/070d8758

Branch: refs/heads/master
Commit: 070d8758fded4836f17816c1b1a34a48aa1bba20
Parents: 45e6d51
Author: Marc Parisi <ph...@apache.org>
Authored: Fri Mar 17 13:11:21 2017 -0400
Committer: Aldrin Piri <al...@apache.org>
Committed: Fri Mar 24 12:57:32 2017 -0400

----------------------------------------------------------------------
 libminifi/src/io/Serializable.cpp          | 61 ++++-------------------
 libminifi/test/unit/SerializationTests.cpp | 64 ++++++++++++++++++++++++-
 2 files changed, 71 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/070d8758/libminifi/src/io/Serializable.cpp
----------------------------------------------------------------------
diff --git a/libminifi/src/io/Serializable.cpp b/libminifi/src/io/Serializable.cpp
index 8d67f15..78dc63c 100644
--- a/libminifi/src/io/Serializable.cpp
+++ b/libminifi/src/io/Serializable.cpp
@@ -15,7 +15,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
+#include <iostream>
 #include <vector>
 #include <string>
 #include <cstdio>
@@ -100,17 +100,14 @@ int Serializable::read(uint8_t *value, int len,DataStream *stream) {
 }
 
 int Serializable::read(uint16_t &value,DataStream *stream, bool is_little_endian) {
-
     return stream->read(value, is_little_endian);
 }
 
 int Serializable::read(uint32_t &value,DataStream *stream, bool is_little_endian) {
-
     return stream->read(value, is_little_endian);
 
 }
 int Serializable::read(uint64_t &value,DataStream *stream, bool is_little_endian) {
-
     return stream->read(value, is_little_endian);
 
 }
@@ -138,9 +135,8 @@ int Serializable::write(uint16_t base_value,DataStream *stream, bool is_little_e
 }
 
 int Serializable::readUTF(std::string &str,DataStream *stream, bool widen) {
-    uint32_t utflen;
+    uint32_t utflen=0;
     int ret = 1;
-
     if (!widen) {
         uint16_t shortLength = 0;
         ret = read(shortLength,stream);
@@ -151,11 +147,14 @@ int Serializable::readUTF(std::string &str,DataStream *stream, bool widen) {
     } else {
         uint32_t len;
         ret = read(len,stream);
+
         if (ret <= 0)
             return ret;
         utflen = len;
     }
 
+
+
     if (utflen == 0)
         return 1;
 
@@ -166,12 +165,6 @@ int Serializable::readUTF(std::string &str,DataStream *stream, bool widen) {
     str = std::string((const char*)&buf[0],utflen);
 
     return utflen;
-    /*
-    if (!widen)
-        return (2 + utflen);
-    else
-        return (4 + utflen);
-        */
 }
 
 int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
@@ -179,16 +172,7 @@ int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
     uint32_t utflen = 0;
     int currentPtr = 0;
 
-    /* use charAt instead of copying String to char array */
-    for (auto c : str) {
-        if (IS_ASCII(c)) {
-            utflen++;
-        }else if (c > 2047){
-        	utflen += 3;
-        } else {
-            utflen += 2;
-        }
-    }
+   utflen = str.length();
 
     if (utflen > 65535)
         return -1;
@@ -199,7 +183,7 @@ int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
             uint16_t shortLen = utflen;
             write(shortLen,stream);
         } else {
-
+          write(utflen,stream);
         }
         return 1;
     }
@@ -207,13 +191,8 @@ int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
     std::vector<uint8_t> utf_to_write;
     if (!widen) {
         utf_to_write.resize(utflen);
-
-        uint16_t shortLen = utflen;
-
     } else {
-
         utf_to_write.resize(utflen);
-
     }
 
     int i = 0;
@@ -221,26 +200,7 @@ int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
 
     uint8_t *underlyingPtr = &utf_to_write[0];
     for (auto c : str) {
-        if (IS_ASCII(c)) {
-            writeData(c, underlyingPtr++);
-        } else if (c > 2047){
-
-        	auto t = (uint8_t) (((c >> 0x0C) & 15) | 192);
-        	writeData(t, underlyingPtr++);
-        	t = (uint8_t) (((c >> 0x06) & 63) | 128);
-        	writeData(t, underlyingPtr++);
-        	t = (uint8_t) (((c >> 0) & 63) | 128);
-			writeData(t, underlyingPtr++);
-
-        } else {
-            auto t = (uint8_t) (((c >> 0x06) & 31) | 192);
-            writeData(t, underlyingPtr++);
-            currentPtr++;
-            t = (uint8_t) (((c >> 0x00) & 63) | 128);
-            writeData(t, underlyingPtr++);
-            currentPtr++;
-
-        }
+      writeData(c, underlyingPtr++);
     }
     int ret;
 
@@ -248,11 +208,6 @@ int Serializable::writeUTF(std::string str,DataStream *stream, bool widen) {
 
         uint16_t short_length = utflen;
         write(short_length,stream);
-
-        for (int i = 0; i < utflen; i++) {
-        }
-        for (auto c : utf_to_write) {
-        }
         ret = stream->writeData(utf_to_write.data(), utflen);
     } else {
         //utflen += 4;

http://git-wip-us.apache.org/repos/asf/nifi-minifi-cpp/blob/070d8758/libminifi/test/unit/SerializationTests.cpp
----------------------------------------------------------------------
diff --git a/libminifi/test/unit/SerializationTests.cpp b/libminifi/test/unit/SerializationTests.cpp
index 6b6e81e..96540bd 100644
--- a/libminifi/test/unit/SerializationTests.cpp
+++ b/libminifi/test/unit/SerializationTests.cpp
@@ -17,7 +17,7 @@
  */
 
 
-
+#include "io/BaseStream.h"
 #include "Site2SitePeer.h"
 #include "Site2SiteClientProtocol.h"
 #include <uuid/uuid.h>
@@ -78,6 +78,68 @@ TEST_CASE("TestSetPortIdUppercase", "[S2S2]"){
 }
 
 
+TEST_CASE("TestWriteUTF", "[MINIFI193]"){
+
+  DataStream baseStream;
+
+  Serializable ser;
+
+  std::string stringOne = "helo world"; // yes, this has a typo.
+  std::string verifyString;
+  ser.writeUTF(stringOne,&baseStream,false);
+
+
+  ser.readUTF(verifyString,&baseStream,false);
+
+  REQUIRE(verifyString == stringOne);
+
+
+
+
+}
+
+
+
+
+TEST_CASE("TestWriteUTF2", "[MINIFI193]"){
+
+  DataStream baseStream;
+
+  Serializable ser;
+
+  std::string stringOne = "hel\xa1o world";
+  REQUIRE(11 == stringOne.length());
+  std::string verifyString;
+  ser.writeUTF(stringOne,&baseStream,false);
 
 
+  ser.readUTF(verifyString,&baseStream,false);
 
+  REQUIRE(verifyString == stringOne);
+
+
+
+
+}
+
+
+TEST_CASE("TestWriteUTF3", "[MINIFI193]"){
+
+  DataStream baseStream;
+
+  Serializable ser;
+
+  std::string stringOne = "\xe4\xbd\xa0\xe5\xa5\xbd\xe4\xb8\x96\xe7\x95\x8c";
+  REQUIRE(12 == stringOne.length());
+  std::string verifyString;
+  ser.writeUTF(stringOne,&baseStream,false);
+
+
+  ser.readUTF(verifyString,&baseStream,false);
+
+  REQUIRE(verifyString == stringOne);
+
+
+
+
+}