You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by pe...@apache.org on 2021/09/29 10:40:38 UTC
[pulsar] 08/08: [Python] Do not sort schema fields by default
(#12232)
This is an automated email from the ASF dual-hosted git repository.
penghui pushed a commit to branch branch-2.8
in repository https://gitbox.apache.org/repos/asf/pulsar.git
commit 9ec4f909625036ea7bb67eac9fcda620418cd027
Author: Matteo Merli <mm...@apache.org>
AuthorDate: Wed Sep 29 04:16:08 2021 -0600
[Python] Do not sort schema fields by default (#12232)
### Motivation
In Avro schema, the order of fields is used in the validation process, so if we are sorting the fields, that will generate an unexpected schema for a python producer/consumer and it will make it not interoperable with Java and other clients.
(cherry picked from commit 2f3ad4d369e8a2ae558c6f9ee85f0b407e5e78b2)
---
.../python/pulsar/schema/definition.py | 19 ++++++++---
pulsar-client-cpp/python/schema_test.py | 38 ++++++++++++++++++++++
2 files changed, 52 insertions(+), 5 deletions(-)
diff --git a/pulsar-client-cpp/python/pulsar/schema/definition.py b/pulsar-client-cpp/python/pulsar/schema/definition.py
index 9335176..fd778f3 100644
--- a/pulsar-client-cpp/python/pulsar/schema/definition.py
+++ b/pulsar-client-cpp/python/pulsar/schema/definition.py
@@ -60,6 +60,9 @@ class Record(with_metaclass(RecordMeta, object)):
# This field is used to set namespace for Avro Record schema.
_avro_namespace = None
+ # Generate a schema where fields are sorted alphabetically
+ _sorted_fields = False
+
def __init__(self, default=None, required_default=False, required=False, *args, **kwargs):
self._required_default = required_default
self._default = default
@@ -114,20 +117,26 @@ class Record(with_metaclass(RecordMeta, object)):
defined_names.add(namespace_name)
- schema = {'name': str(cls.__name__)}
+ schema = {
+ 'type': 'record',
+ 'name': str(cls.__name__)
+ }
if cls._avro_namespace is not None:
schema['namespace'] = cls._avro_namespace
- schema['type'] = 'record'
schema['fields'] = []
- for name in sorted(cls._fields.keys()):
+ if cls._sorted_fields:
+ fields = sorted(cls._fields.keys())
+ else:
+ fields = cls._fields.keys()
+ for name in fields:
field = cls._fields[name]
field_type = field.schema_info(defined_names) \
if field._required else ['null', field.schema_info(defined_names)]
schema['fields'].append({
'name': name,
- 'type': field_type,
- 'default': field.default()
+ 'default': field.default(),
+ 'type': field_type
}) if field.required_default() else schema['fields'].append({
'name': name,
'type': field_type,
diff --git a/pulsar-client-cpp/python/schema_test.py b/pulsar-client-cpp/python/schema_test.py
index 40497ad..7adbcbe 100755
--- a/pulsar-client-cpp/python/schema_test.py
+++ b/pulsar-client-cpp/python/schema_test.py
@@ -38,6 +38,7 @@ class SchemaTest(TestCase):
blue = 3
class Example(Record):
+ _sorted_fields = True
a = String()
b = Integer()
c = Array(String())
@@ -78,11 +79,13 @@ class SchemaTest(TestCase):
def test_complex(self):
class MySubRecord(Record):
+ _sorted_fields = True
x = Integer()
y = Long()
z = String()
class Example(Record):
+ _sorted_fields = True
a = String()
sub = MySubRecord # Test with class
sub2 = MySubRecord() # Test with instance
@@ -348,6 +351,34 @@ class SchemaTest(TestCase):
self.assertEqual(r2.__class__.__name__, 'Example')
self.assertEqual(r2, r)
+ def test_non_sorted_fields(self):
+ class T1(Record):
+ a = Integer()
+ b = Integer()
+ c = Double()
+ d = String()
+
+ class T2(Record):
+ b = Integer()
+ a = Integer()
+ d = String()
+ c = Double()
+
+ self.assertNotEqual(T1.schema()['fields'], T2.schema()['fields'])
+
+ def test_sorted_fields(self):
+ class T1(Record):
+ _sorted_fields = True
+ a = Integer()
+ b = Integer()
+
+ class T2(Record):
+ _sorted_fields = True
+ b = Integer()
+ a = Integer()
+
+ self.assertEqual(T1.schema()['fields'], T2.schema()['fields'])
+
def test_schema_version(self):
class Example(Record):
a = Integer()
@@ -691,6 +722,7 @@ class SchemaTest(TestCase):
def test_avro_required_default(self):
class MySubRecord(Record):
+ _sorted_fields = True
x = Integer()
y = Long()
z = String()
@@ -707,7 +739,9 @@ class SchemaTest(TestCase):
i = Map(String())
j = MySubRecord()
+
class ExampleRequiredDefault(Record):
+ _sorted_fields = True
a = Integer(required_default=True)
b = Boolean(required=True, required_default=True)
c = Long(required_default=True)
@@ -879,10 +913,12 @@ class SchemaTest(TestCase):
def test_serialize_schema_complex(self):
class NestedObj1(Record):
+ _sorted_fields = True
na1 = String()
nb1 = Double()
class NestedObj2(Record):
+ _sorted_fields = True
na2 = Integer()
nb2 = Boolean()
nc2 = NestedObj1()
@@ -892,6 +928,7 @@ class SchemaTest(TestCase):
class NestedObj4(Record):
_avro_namespace = 'xxx4'
+ _sorted_fields = True
na4 = String()
nb4 = Integer()
@@ -902,6 +939,7 @@ class SchemaTest(TestCase):
class ComplexRecord(Record):
_avro_namespace = 'xxx.xxx'
+ _sorted_fields = True
a = Integer()
b = Integer()
color = Color