You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@thrift.apache.org by Jean Rodier <Je...@tatacommunications.com> on 2018/01/18 15:45:41 UTC

Is the Thrift serialization compatible both directions?

Hi,

Is this statement true, especially the last part?  (from: https://diwakergupta.github.io/thrift-missing-guide/)

Any new fields that you add should be optional. This means that any messages serialized by code using your "old" message format can be parsed by your new generated code, as they won’t be missing any required elements. Similarly, messages created by your new code can be parsed by your old code: old binaries simply ignore the new field when parsing. However, the unknown fields are not discarded, and if the message is later serialized, the unknown fields are serialized along with it — so if the message is passed on to new code, the new fields are still available.

I tested is using the following files in C++ and apparently it is not…

File msg1.thrift
--------------------
namespace cpp msg1
struct msgStruct {
    1:                        i32 version,
    2:                        i64 time
}

File msg2.thrift
--------------------
namespace cpp msg2
struct msgStruct {
    1:                        i32 version,
    2:                        i64 time,
    3: optional i32 flag
}

File main.cpp
------------------
/*
Build commands:
   thrift --gen cpp msg1.thrift
   thrift --gen cpp msg2.thrift

   g++ main.cpp -o test -lthrift -lthriftz
*/

#include <vector>
using namespace std;

#include <thrift/Thrift.h>
#include <thrift/protocol/TProtocol.h>
#include <thrift/protocol/TCompactProtocol.h>
#include <thrift/protocol/TJSONProtocol.h>
#include <thrift/transport/TTransport.h>

#include "gen-cpp/msg1_types.h"      // Thrift generated
#include "gen-cpp/msg2_types.h"      // Thrift generated

template<typename T>
void serialize(const T & obj, vector<uint8_t> & s) {
   ::boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> buffer(new apache::thrift::transport::TMemoryBuffer());
   //::boost::shared_ptr<apache::thrift::protocol::TCompactProtocol> protocol(new apache::thrift::protocol::TCompactProtocol(buffer));
   ::boost::shared_ptr<apache::thrift::protocol::TJSONProtocol> protocol(new apache::thrift::protocol::TJSONProtocol(buffer));

   obj.write(protocol.get());

   uint8_t * buf;
   uint32_t sz;

   buffer.get()->getBuffer(&buf, &sz);
   s.resize(sz);
   s.assign(buf, buf + sz);
}

template<typename T>
void deserialize(const vector<uint8_t> & s, T & obj) {
   ::boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> buffer(new apache::thrift::transport::TMemoryBuffer((uint8_t*)s.data(), s.size()));
   //::boost::shared_ptr<apache::thrift::protocol::TCompactProtocol> protocol(new apache::thrift::protocol::TCompactProtocol(buffer));
   ::boost::shared_ptr<apache::thrift::protocol::TJSONProtocol> protocol(new apache::thrift::protocol::TJSONProtocol(buffer));

   obj.read(protocol.get());
}

int main(int argc, char** argv) {
   msg2::msgStruct m2;
   m2.__set_version(123);
  m2.__set_time(456);
   m2.__set_flag(789);

   vector<uint8_t> buf;
   serialize<msg2::msgStruct>(m2, buf);

   string output(buf.begin(), buf.end());
   cout << output << endl;
   // {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}

   msg1::msgStruct m1;
   deserialize<msg1::msgStruct>(buf, m1);

   vector<uint8_t> buf2;
   serialize<msg1::msgStruct>(m1, buf2);

   string output2(buf2.begin(), buf2.end());
   cout << output2 << endl;
   // {"1":{"i32":123},"2":{"i64":456}} expecting {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}

   msg2::msgStruct m2_2;
   deserialize<msg2::msgStruct>(buf2, m2_2);

   vector<uint8_t> buf3;
   serialize<msg2::msgStruct>(m2_2, buf3);

   string output3(buf3.begin(), buf3.end());
   cout << output3 << endl;
   // {"1":{"i32":123},"2":{"i64":456}} expecting {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}
}

#include "gen-cpp/msg1_types.cpp"      // Thrift generated
#include "gen-cpp/msg1_constants.cpp"  // Thrift generated
#include "gen-cpp/msg2_types.cpp"      // Thrift generated
#include "gen-cpp/msg2_constants.cpp"  // Thrift generated

Re: Is the Thrift serialization compatible both directions?

Posted by Randy Abernethy <ra...@apache.org>.
Taking this apart:

- Any new fields that you add should be optional.
I disagree, default requiredness with a default value works fine as well
(and is my preference if you are not interested in optimizing the field
away in procs that know about it). On the server side,if the default field
is there great, if not the default value is used. On the client side if the
field is known it is sent if not it is not.

- This means that any messages serialized by code using your "old" message
format can be parsed by your new generated code, as they won’t be missing
any required elements.
True and true with default requiredness as well.

- Similarly, messages created by your new code can be parsed by your old
code: old binaries simply ignore the new field when parsing.
True and true with default requiredness as well.

- However, the unknown fields are not discarded, and if the message is
later serialized, the unknown fields are serialized along with it — so if
the message is passed on to new code, the new fields are still available.
This is inaccurate. If you use Thrift "normally", when you deserialize, the
struct (message) will only contain the fields you know (in any
requiredness).

--Randy

On Thu, Jan 18, 2018 at 7:59 AM, Jean Rodier <
Jean.Rodier@tatacommunications.com> wrote:

> Hi,
>
> Is this statement true, especially the last part?
>
> Any new fields that you add should be optional. This means that any
> messages serialized by code using your "old" message format can be parsed
> by your new generated code, as they won’t be missing any required elements.
> Similarly, messages created by your new code can be parsed by your old
> code: old binaries simply ignore the new field when parsing. However, the
> unknown fields are not discarded, and if the message is later serialized,
> the unknown fields are serialized along with it — so if the message is
> passed on to new code, the new fields are still available.
>
> Jean
>

Re: Is the Thrift serialization compatible both directions?

Posted by Randy Abernethy <ra...@apache.org>.
Taking this apart:

- Any new fields that you add should be optional.
I disagree, default requiredness with a default value works fine as well
(and is my preference if you are not interested in optimizing the field
away in procs that know about it). On the server side,if the default field
is there great, if not the default value is used. On the client side if the
field is known it is sent if not it is not.

- This means that any messages serialized by code using your "old" message
format can be parsed by your new generated code, as they won’t be missing
any required elements.
True and true with default requiredness as well.

- Similarly, messages created by your new code can be parsed by your old
code: old binaries simply ignore the new field when parsing.
True and true with default requiredness as well.

- However, the unknown fields are not discarded, and if the message is
later serialized, the unknown fields are serialized along with it — so if
the message is passed on to new code, the new fields are still available.
This is inaccurate. If you use Thrift "normally", when you deserialize, the
struct (message) will only contain the fields you know (in any
requiredness).

--Randy

On Thu, Jan 18, 2018 at 7:59 AM, Jean Rodier <
Jean.Rodier@tatacommunications.com> wrote:

> Hi,
>
> Is this statement true, especially the last part?
>
> Any new fields that you add should be optional. This means that any
> messages serialized by code using your "old" message format can be parsed
> by your new generated code, as they won’t be missing any required elements.
> Similarly, messages created by your new code can be parsed by your old
> code: old binaries simply ignore the new field when parsing. However, the
> unknown fields are not discarded, and if the message is later serialized,
> the unknown fields are serialized along with it — so if the message is
> passed on to new code, the new fields are still available.
>
> Jean
>

Is the Thrift serialization compatible both directions?

Posted by Jean Rodier <Je...@tatacommunications.com>.
Hi,

Is this statement true, especially the last part?

Any new fields that you add should be optional. This means that any messages serialized by code using your "old" message format can be parsed by your new generated code, as they won’t be missing any required elements. Similarly, messages created by your new code can be parsed by your old code: old binaries simply ignore the new field when parsing. However, the unknown fields are not discarded, and if the message is later serialized, the unknown fields are serialized along with it — so if the message is passed on to new code, the new fields are still available.

Jean

Is the Thrift serialization compatible both directions?

Posted by Jean Rodier <Je...@tatacommunications.com>.
Hi,

Is this statement true, especially the last part?

Any new fields that you add should be optional. This means that any messages serialized by code using your "old" message format can be parsed by your new generated code, as they won’t be missing any required elements. Similarly, messages created by your new code can be parsed by your old code: old binaries simply ignore the new field when parsing. However, the unknown fields are not discarded, and if the message is later serialized, the unknown fields are serialized along with it — so if the message is passed on to new code, the new fields are still available.

Jean

Is the Thrift serialization compatible both directions?

Posted by Jean Rodier <Je...@tatacommunications.com>.
Hi,

Is this statement true, especially the last part?  (from: https://diwakergupta.github.io/thrift-missing-guide/)

Any new fields that you add should be optional. This means that any messages serialized by code using your "old" message format can be parsed by your new generated code, as they won’t be missing any required elements. Similarly, messages created by your new code can be parsed by your old code: old binaries simply ignore the new field when parsing. However, the unknown fields are not discarded, and if the message is later serialized, the unknown fields are serialized along with it — so if the message is passed on to new code, the new fields are still available.

I tested is using the following files in C++ and apparently it is not…

File msg1.thrift
--------------------
namespace cpp msg1
struct msgStruct {
    1:                        i32 version,
    2:                        i64 time
}

File msg2.thrift
--------------------
namespace cpp msg2
struct msgStruct {
    1:                        i32 version,
    2:                        i64 time,
    3: optional i32 flag
}

File main.cpp
------------------
/*
Build commands:
   thrift --gen cpp msg1.thrift
   thrift --gen cpp msg2.thrift

   g++ main.cpp -o test -lthrift -lthriftz */

#include <vector>
using namespace std;

#include <thrift/Thrift.h>
#include <thrift/protocol/TProtocol.h>
#include <thrift/protocol/TCompactProtocol.h>
#include <thrift/protocol/TJSONProtocol.h> #include <thrift/transport/TTransport.h>

#include "gen-cpp/msg1_types.h"      // Thrift generated
#include "gen-cpp/msg2_types.h"      // Thrift generated

template<typename T>
void serialize(const T & obj, vector<uint8_t> & s) {
   ::boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> buffer(new apache::thrift::transport::TMemoryBuffer());
   //::boost::shared_ptr<apache::thrift::protocol::TCompactProtocol> protocol(new apache::thrift::protocol::TCompactProtocol(buffer));
   ::boost::shared_ptr<apache::thrift::protocol::TJSONProtocol> protocol(new apache::thrift::protocol::TJSONProtocol(buffer));

   obj.write(protocol.get());

   uint8_t * buf;
   uint32_t sz;

   buffer.get()->getBuffer(&buf, &sz);
   s.resize(sz);
   s.assign(buf, buf + sz);
}

template<typename T>
void deserialize(const vector<uint8_t> & s, T & obj) {
   ::boost::shared_ptr<apache::thrift::transport::TMemoryBuffer> buffer(new apache::thrift::transport::TMemoryBuffer((uint8_t*)s.data(), s.size()));
   //::boost::shared_ptr<apache::thrift::protocol::TCompactProtocol> protocol(new apache::thrift::protocol::TCompactProtocol(buffer));
   ::boost::shared_ptr<apache::thrift::protocol::TJSONProtocol> protocol(new apache::thrift::protocol::TJSONProtocol(buffer));

   obj.read(protocol.get());
}

int main(int argc, char** argv) {
   msg2::msgStruct m2;
   m2.__set_version(123);
  m2.__set_time(456);
   m2.__set_flag(789);

   vector<uint8_t> buf;
   serialize<msg2::msgStruct>(m2, buf);

   string output(buf.begin(), buf.end());
   cout << output << endl;
   // {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}

   msg1::msgStruct m1;
   deserialize<msg1::msgStruct>(buf, m1);

   vector<uint8_t> buf2;
   serialize<msg1::msgStruct>(m1, buf2);

   string output2(buf2.begin(), buf2.end());
   cout << output2 << endl;
   // {"1":{"i32":123},"2":{"i64":456}} expecting {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}

   msg2::msgStruct m2_2;
   deserialize<msg2::msgStruct>(buf2, m2_2);

   vector<uint8_t> buf3;
   serialize<msg2::msgStruct>(m2_2, buf3);

   string output3(buf3.begin(), buf3.end());
   cout << output3 << endl;
   // {"1":{"i32":123},"2":{"i64":456}} expecting {"1":{"i32":123},"2":{"i64":456},"3":{"i32":789}}
}

#include "gen-cpp/msg1_types.cpp"      // Thrift generated
#include "gen-cpp/msg1_constants.cpp"  // Thrift generated
#include "gen-cpp/msg2_types.cpp"      // Thrift generated
#include "gen-cpp/msg2_constants.cpp"  // Thrift generated