You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pulsar.apache.org by pe...@apache.org on 2021/08/09 09:36:23 UTC

[pulsar] branch master updated: [Python Schema] Fix redefined Record or Enum in Python schema (#11595)

This is an automated email from the ASF dual-hosted git repository.

penghui pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pulsar.git


The following commit(s) were added to refs/heads/master by this push:
     new d66cdbd  [Python Schema] Fix redefined Record or Enum in Python schema (#11595)
d66cdbd is described below

commit d66cdbd01eead279f83dda9a30626ff647d6e8f4
Author: ran <ga...@126.com>
AuthorDate: Mon Aug 9 17:35:46 2021 +0800

    [Python Schema] Fix redefined Record or Enum in Python schema (#11595)
    
    Fixes #11533
    
    ### Motivation
    
    Refer to issue #11533 , currently, if users redefined the same `Record` or `Enum` in `Record`, the schema info isn't reused the defined name, this does not match the Avro schema info format.
    
    ### Modifications
    
    Add a new method `schema_info(self, defined_names)` in `Record`, `Array`, `Map`, and `Enum`, all defined names will be added in the parameter `defined_names` when users use a defined `Record`, or `Enum`, the schema info will use the name of the defined `Record` or `Enum` as the type.
---
 .../python/pulsar/schema/definition.py             | 40 +++++++++++++--
 pulsar-client-cpp/python/schema_test.py            | 58 ++++++++++++++++++----
 2 files changed, 83 insertions(+), 15 deletions(-)

diff --git a/pulsar-client-cpp/python/pulsar/schema/definition.py b/pulsar-client-cpp/python/pulsar/schema/definition.py
index 41c094d..6db71d8 100644
--- a/pulsar-client-cpp/python/pulsar/schema/definition.py
+++ b/pulsar-client-cpp/python/pulsar/schema/definition.py
@@ -97,15 +97,23 @@ class Record(with_metaclass(RecordMeta, object)):
 
     @classmethod
     def schema(cls):
+        return cls.schema_info(set())
+
+    @classmethod
+    def schema_info(cls, defined_names):
+        if cls.__name__ in defined_names:
+            return cls.__name__
+
+        defined_names.add(cls.__name__)
         schema = {
             'name': str(cls.__name__),
             'type': 'record',
             'fields': []
         }
-
         for name in sorted(cls._fields.keys()):
             field = cls._fields[name]
-            field_type = field.schema() if field._required else ['null', field.schema()]
+            field_type = field.schema_info(defined_names) \
+                if field._required else ['null', field.schema_info(defined_names)]
             schema['fields'].append({
                 'name': name,
                 'type': field_type,
@@ -198,6 +206,9 @@ class Field(object):
         # For primitive types, the schema would just be the type itself
         return self.type()
 
+    def schema_info(self, defined_names):
+        return self.type()
+
     def default(self):
         return self._default
 
@@ -347,6 +358,9 @@ class _Enum(Field):
         return self.enum_type
 
     def validate_type(self, name, val):
+        if val is None:
+            return None
+
         if type(val) is str:
             # The enum was passed as a string, we need to check it against the possible values
             if val in self.enum_type.__members__:
@@ -367,6 +381,12 @@ class _Enum(Field):
             return val
 
     def schema(self):
+        return self.schema_info(set())
+
+    def schema_info(self, defined_names):
+        if self.enum_type.__name__ in defined_names:
+            return self.enum_type.__name__
+        defined_names.add(self.enum_type.__name__)
         return {
             'type': self.type(),
             'name': self.enum_type.__name__,
@@ -393,6 +413,9 @@ class Array(Field):
         return list
 
     def validate_type(self, name, val):
+        if val is None:
+            return None
+
         super(Array, self).validate_type(name, val)
 
         for x in val:
@@ -402,9 +425,12 @@ class Array(Field):
         return val
 
     def schema(self):
+        return self.schema_info(set())
+
+    def schema_info(self, defined_names):
         return {
             'type': self.type(),
-            'items': self.array_type.schema() if isinstance(self.array_type, (Array, Map, Record))
+            'items': self.array_type.schema_info(defined_names) if isinstance(self.array_type, (Array, Map, Record))
                 else self.array_type.type()
         }
 
@@ -428,6 +454,9 @@ class Map(Field):
         return dict
 
     def validate_type(self, name, val):
+        if val is None:
+            return None
+
         super(Map, self).validate_type(name, val)
 
         for k, v in val.items():
@@ -440,9 +469,12 @@ class Map(Field):
         return val
 
     def schema(self):
+        return self.schema_info(set())
+
+    def schema_info(self, defined_names):
         return {
             'type': self.type(),
-            'values': self.value_type.schema() if isinstance(self.value_type, (Array, Map, Record))
+            'values': self.value_type.schema_info(defined_names) if isinstance(self.value_type, (Array, Map, Record))
                 else self.value_type.type()
         }
 
diff --git a/pulsar-client-cpp/python/schema_test.py b/pulsar-client-cpp/python/schema_test.py
index 7ec0c9a..35d9316 100755
--- a/pulsar-client-cpp/python/schema_test.py
+++ b/pulsar-client-cpp/python/schema_test.py
@@ -19,6 +19,8 @@
 #
 
 from unittest import TestCase, main
+
+import fastavro
 import pulsar
 from pulsar.schema import *
 from enum import Enum
@@ -46,6 +48,7 @@ class SchemaTest(TestCase):
             h = Bytes()
             i = Map(String())
 
+        fastavro.parse_schema(Example.schema())
         self.assertEqual(Example.schema(), {
             "name": "Example",
             "type": "record",
@@ -84,6 +87,7 @@ class SchemaTest(TestCase):
             sub = MySubRecord     # Test with class
             sub2 = MySubRecord()  # Test with instance
 
+        fastavro.parse_schema(Example.schema())
         self.assertEqual(Example.schema(), {
             "name": "Example",
             "type": "record",
@@ -99,13 +103,7 @@ class SchemaTest(TestCase):
                  }]
                  },
                  {"name": "sub2",
-                  "type": ["null", {
-                     "name": "MySubRecord",
-                     "type": "record",
-                     "fields": [{"name": "x", "type": ["null", "int"]},
-                                {"name": "y", "type": ["null", "long"]},
-                                {"name": "z", "type": ["null", "string"]}]
-                 }]
+                  "type": ["null", 'MySubRecord']
                  }
             ]
         })
@@ -896,12 +894,22 @@ class SchemaTest(TestCase):
             na4 = String()
             nb4 = Integer()
 
+        class Color(Enum):
+            red = 1
+            green = 2
+            blue = 3
+
         class ComplexRecord(Record):
             a = Integer()
             b = Integer()
+            color = Color
+            color2 = Color
             nested = NestedObj2()
+            nested2 = NestedObj2()
             mapNested = Map(NestedObj3())
+            mapNested2 = Map(NestedObj3())
             arrayNested = Array(NestedObj4())
+            arrayNested2 = Array(NestedObj4())
 
         print('complex schema: ', ComplexRecord.schema())
         self.assertEqual(ComplexRecord.schema(), {
@@ -909,18 +917,23 @@ class SchemaTest(TestCase):
             "type": "record",
             "fields": [
                 {"name": "a", "type": ["null", "int"]},
-                {'name': 'arrayNested', 'type': ['null',
-                    {'type': 'array', 'items': {'name': 'NestedObj4', 'type': 'record', 'fields': [
+                {'name': 'arrayNested', 'type': ['null', {'type': 'array', 'items':
+                    {'name': 'NestedObj4', 'type': 'record', 'fields': [
                         {'name': 'na4', 'type': ['null', 'string']},
                         {'name': 'nb4', 'type': ['null', 'int']}
                     ]}}
                 ]},
+                {'name': 'arrayNested2', 'type': ['null', {'type': 'array', 'items': 'NestedObj4'}]},
                 {"name": "b", "type": ["null", "int"]},
+                {'name': 'color', 'type': ['null', {'type': 'enum', 'name': 'Color', 'symbols': [
+                    'red', 'green', 'blue']}]},
+                {'name': 'color2', 'type': ['null', 'Color']},
                 {'name': 'mapNested', 'type': ['null', {'type': 'map', 'values':
                     {'name': 'NestedObj3', 'type': 'record', 'fields': [
                         {'name': 'na3', 'type': ['null', 'int']}
                     ]}}
                 ]},
+                {'name': 'mapNested2', 'type': ['null', {'type': 'map', 'values': 'NestedObj3'}]},
                 {"name": "nested", "type": ['null', {'name': 'NestedObj2', 'type': 'record', 'fields': [
                     {'name': 'na2', 'type': ['null', 'int']},
                     {'name': 'nb2', 'type': ['null', 'boolean']},
@@ -928,7 +941,8 @@ class SchemaTest(TestCase):
                         {'name': 'na1', 'type': ['null', 'string']},
                         {'name': 'nb1', 'type': ['null', 'double']}
                     ]}]}
-                ]}]}
+                ]}]},
+                {"name": "nested2", "type": ['null', 'NestedObj2']}
             ]
         })
 
@@ -939,13 +953,22 @@ class SchemaTest(TestCase):
 
             nested_obj1 = NestedObj1(na1='na1 value', nb1=20.5)
             nested_obj2 = NestedObj2(na2=22, nb2=True, nc2=nested_obj1)
-            r = ComplexRecord(a=1, b=2, nested=nested_obj2, mapNested={
+            r = ComplexRecord(a=1, b=2, color=Color.red, color2=Color.blue,
+                              nested=nested_obj2, nested2=nested_obj2,
+            mapNested={
                 'a': NestedObj3(na3=1),
                 'b': NestedObj3(na3=2),
                 'c': NestedObj3(na3=3)
+            }, mapNested2={
+                'd': NestedObj3(na3=4),
+                'e': NestedObj3(na3=5),
+                'f': NestedObj3(na3=6)
             }, arrayNested=[
                 NestedObj4(na4='value na4 1', nb4=100),
                 NestedObj4(na4='value na4 2', nb4=200)
+            ], arrayNested2=[
+                NestedObj4(na4='value na4 3', nb4=300),
+                NestedObj4(na4='value na4 4', nb4=400)
             ])
             data_encode = data_schema.encode(r)
 
@@ -954,17 +977,30 @@ class SchemaTest(TestCase):
             self.assertEqual(data_decode, r)
             self.assertEqual(data_decode.a, 1)
             self.assertEqual(data_decode.b, 2)
+            self.assertEqual(data_decode.color, Color.red)
+            self.assertEqual(data_decode.color2, Color.blue)
             self.assertEqual(data_decode.nested.na2, 22)
             self.assertEqual(data_decode.nested.nb2, True)
             self.assertEqual(data_decode.nested.nc2.na1, 'na1 value')
             self.assertEqual(data_decode.nested.nc2.nb1, 20.5)
+            self.assertEqual(data_decode.nested2.na2, 22)
+            self.assertEqual(data_decode.nested2.nb2, True)
+            self.assertEqual(data_decode.nested2.nc2.na1, 'na1 value')
+            self.assertEqual(data_decode.nested2.nc2.nb1, 20.5)
             self.assertEqual(data_decode.mapNested['a'].na3, 1)
             self.assertEqual(data_decode.mapNested['b'].na3, 2)
             self.assertEqual(data_decode.mapNested['c'].na3, 3)
+            self.assertEqual(data_decode.mapNested2['d'].na3, 4)
+            self.assertEqual(data_decode.mapNested2['e'].na3, 5)
+            self.assertEqual(data_decode.mapNested2['f'].na3, 6)
             self.assertEqual(data_decode.arrayNested[0].na4, 'value na4 1')
             self.assertEqual(data_decode.arrayNested[0].nb4, 100)
             self.assertEqual(data_decode.arrayNested[1].na4, 'value na4 2')
             self.assertEqual(data_decode.arrayNested[1].nb4, 200)
+            self.assertEqual(data_decode.arrayNested2[0].na4, 'value na4 3')
+            self.assertEqual(data_decode.arrayNested2[0].nb4, 300)
+            self.assertEqual(data_decode.arrayNested2[1].na4, 'value na4 4')
+            self.assertEqual(data_decode.arrayNested2[1].nb4, 400)
             print('Encode and decode complex schema finish. schema_type: ', schema_type)
 
         encode_and_decode('avro')