You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by pe...@apache.org on 2021/09/29 10:40:38 UTC

[pulsar] 08/08: [Python] Do not sort schema fields by default (#12232)

This is an automated email from the ASF dual-hosted git repository.

penghui pushed a commit to branch branch-2.8
in repository https://gitbox.apache.org/repos/asf/pulsar.git

commit 9ec4f909625036ea7bb67eac9fcda620418cd027
Author: Matteo Merli <mm...@apache.org>
AuthorDate: Wed Sep 29 04:16:08 2021 -0600

    [Python] Do not sort schema fields by default (#12232)
    
    ### Motivation
    
    In Avro schema, the order of fields is used in the validation process, so if we are sorting the fields, that will generate an unexpected schema for a python producer/consumer and it will make it not interoperable with Java and other clients.
    
    (cherry picked from commit 2f3ad4d369e8a2ae558c6f9ee85f0b407e5e78b2)
---
 .../python/pulsar/schema/definition.py             | 19 ++++++++---
 pulsar-client-cpp/python/schema_test.py            | 38 ++++++++++++++++++++++
 2 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/pulsar-client-cpp/python/pulsar/schema/definition.py b/pulsar-client-cpp/python/pulsar/schema/definition.py
index 9335176..fd778f3 100644
--- a/pulsar-client-cpp/python/pulsar/schema/definition.py
+++ b/pulsar-client-cpp/python/pulsar/schema/definition.py
@@ -60,6 +60,9 @@ class Record(with_metaclass(RecordMeta, object)):
     # This field is used to set namespace for Avro Record schema.
     _avro_namespace = None
 
+    # Generate a schema where fields are sorted alphabetically
+    _sorted_fields = False
+
     def __init__(self, default=None, required_default=False, required=False, *args, **kwargs):
         self._required_default = required_default
         self._default = default
@@ -114,20 +117,26 @@ class Record(with_metaclass(RecordMeta, object)):
 
         defined_names.add(namespace_name)
 
-        schema = {'name': str(cls.__name__)}
+        schema = {
+            'type': 'record',
+            'name': str(cls.__name__)
+        }
         if cls._avro_namespace is not None:
             schema['namespace'] = cls._avro_namespace
-        schema['type'] = 'record'
         schema['fields'] = []
 
-        for name in sorted(cls._fields.keys()):
+        if cls._sorted_fields:
+            fields = sorted(cls._fields.keys())
+        else:
+            fields = cls._fields.keys()
+        for name in fields:
             field = cls._fields[name]
             field_type = field.schema_info(defined_names) \
                 if field._required else ['null', field.schema_info(defined_names)]
             schema['fields'].append({
                 'name': name,
-                'type': field_type,
-                'default': field.default()
+                'default': field.default(),
+                'type': field_type
             }) if field.required_default() else schema['fields'].append({
                 'name': name,
                 'type': field_type,
diff --git a/pulsar-client-cpp/python/schema_test.py b/pulsar-client-cpp/python/schema_test.py
index 40497ad..7adbcbe 100755
--- a/pulsar-client-cpp/python/schema_test.py
+++ b/pulsar-client-cpp/python/schema_test.py
@@ -38,6 +38,7 @@ class SchemaTest(TestCase):
             blue = 3
 
         class Example(Record):
+            _sorted_fields = True
             a = String()
             b = Integer()
             c = Array(String())
@@ -78,11 +79,13 @@ class SchemaTest(TestCase):
 
     def test_complex(self):
         class MySubRecord(Record):
+            _sorted_fields = True
             x = Integer()
             y = Long()
             z = String()
 
         class Example(Record):
+            _sorted_fields = True
             a = String()
             sub = MySubRecord     # Test with class
             sub2 = MySubRecord()  # Test with instance
@@ -348,6 +351,34 @@ class SchemaTest(TestCase):
         self.assertEqual(r2.__class__.__name__, 'Example')
         self.assertEqual(r2, r)
 
+    def test_non_sorted_fields(self):
+        class T1(Record):
+            a = Integer()
+            b = Integer()
+            c = Double()
+            d = String()
+
+        class T2(Record):
+            b = Integer()
+            a = Integer()
+            d = String()
+            c = Double()
+
+        self.assertNotEqual(T1.schema()['fields'], T2.schema()['fields'])
+
+    def test_sorted_fields(self):
+        class T1(Record):
+            _sorted_fields = True
+            a = Integer()
+            b = Integer()
+
+        class T2(Record):
+            _sorted_fields = True
+            b = Integer()
+            a = Integer()
+
+        self.assertEqual(T1.schema()['fields'], T2.schema()['fields'])
+
     def test_schema_version(self):
         class Example(Record):
             a = Integer()
@@ -691,6 +722,7 @@ class SchemaTest(TestCase):
 
     def test_avro_required_default(self):
         class MySubRecord(Record):
+            _sorted_fields = True
             x = Integer()
             y = Long()
             z = String()
@@ -707,7 +739,9 @@ class SchemaTest(TestCase):
             i = Map(String())
             j = MySubRecord()
 
+
         class ExampleRequiredDefault(Record):
+            _sorted_fields = True
             a = Integer(required_default=True)
             b = Boolean(required=True, required_default=True)
             c = Long(required_default=True)
@@ -879,10 +913,12 @@ class SchemaTest(TestCase):
 
     def test_serialize_schema_complex(self):
         class NestedObj1(Record):
+            _sorted_fields = True
             na1 = String()
             nb1 = Double()
 
         class NestedObj2(Record):
+            _sorted_fields = True
             na2 = Integer()
             nb2 = Boolean()
             nc2 = NestedObj1()
@@ -892,6 +928,7 @@ class SchemaTest(TestCase):
 
         class NestedObj4(Record):
             _avro_namespace = 'xxx4'
+            _sorted_fields = True
             na4 = String()
             nb4 = Integer()
 
@@ -902,6 +939,7 @@ class SchemaTest(TestCase):
 
         class ComplexRecord(Record):
             _avro_namespace = 'xxx.xxx'
+            _sorted_fields = True
             a = Integer()
             b = Integer()
             color = Color