You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2021/06/10 02:10:04 UTC
[avro] branch master updated: AVRO-2921: Type Hints for avro.schema
(#1252)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 6dc6055 AVRO-2921: Type Hints for avro.schema (#1252)
6dc6055 is described below
commit 6dc6055e04188919886bd1b4ed95a7c6f4fac23e
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Wed Jun 9 22:09:54 2021 -0400
AVRO-2921: Type Hints for avro.schema (#1252)
---
lang/py/avro/compatibility.py | 12 +--
lang/py/avro/schema.py | 192 ++++++++++++++++++------------------------
2 files changed, 89 insertions(+), 115 deletions(-)
diff --git a/lang/py/avro/compatibility.py b/lang/py/avro/compatibility.py
index 1200773..0ce084a 100644
--- a/lang/py/avro/compatibility.py
+++ b/lang/py/avro/compatibility.py
@@ -18,7 +18,7 @@
# limitations under the License.
from copy import copy
from enum import Enum
-from typing import List, Optional, Set, cast
+from typing import Container, Iterable, List, Optional, Set, cast
from avro.errors import AvroRuntimeException
from avro.schema import (
@@ -374,16 +374,18 @@ def incompatible(incompat_type: SchemaIncompatibilityType, message: str, locatio
def schema_name_equals(reader: NamedSchema, writer: NamedSchema) -> bool:
- if reader.name == writer.name:
- return True
- return writer.fullname in reader.props.get("aliases", [])
+ aliases = reader.props.get("aliases")
+ return (reader.name == writer.name) or (isinstance(aliases, Container) and writer.fullname in aliases)
def lookup_writer_field(writer_schema: RecordSchema, reader_field: Field) -> Optional[Field]:
direct = writer_schema.fields_dict.get(reader_field.name)
if direct:
return cast(Field, direct)
- for alias in reader_field.props.get("aliases", []):
+ aliases = reader_field.props.get("aliases")
+ if not isinstance(aliases, Iterable):
+ return None
+ for alias in aliases:
writer_field = writer_schema.fields_dict.get(alias)
if writer_field is not None:
return cast(Field, writer_field)
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index ddd02f9..9cd7551 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -47,6 +47,7 @@ import math
import sys
import uuid
import warnings
+from typing import MutableMapping, Optional, Sequence, cast
import avro.constants
import avro.errors
@@ -99,7 +100,7 @@ LONG_MIN_VALUE = -(1 << 63)
LONG_MAX_VALUE = (1 << 63) - 1
-def _is_timezone_aware_datetime(dt):
+def _is_timezone_aware_datetime(dt: datetime.datetime) -> bool:
return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
@@ -108,32 +109,62 @@ def _is_timezone_aware_datetime(dt):
#
+class PropertiesMixin:
+ """A mixin that provides basic properties."""
+
+ _reserved_properties: Sequence[str] = ()
+ _props: Optional[MutableMapping[str, object]] = None
+
+ @property
+ def props(self) -> MutableMapping[str, object]:
+ if self._props is None:
+ self._props = {}
+ return self._props
+
+ def get_prop(self, key: str) -> Optional[object]:
+ return self.props.get(key)
+
+ def set_prop(self, key: str, value: object) -> None:
+ self.props[key] = value
+
+ def check_props(self, other: "PropertiesMixin", props: Sequence[str]) -> bool:
+ """Check that the given props are identical in two schemas.
+
+ @arg other: The other schema to check
+ @arg props: An iterable of properties to check
+ @return bool: True if all the properties match
+ """
+ return all(getattr(self, prop) == getattr(other, prop) for prop in props)
+
+ @property
+ def other_props(self) -> MutableMapping[str, object]:
+ """Dictionary of non-reserved properties"""
+ return get_other_props(self.props, self._reserved_properties)
+
+
class EqualByJsonMixin:
- """Equal if the json serializations are equal."""
+ """A mixin that defines equality as equal if the json serializations are equal."""
- def __eq__(self, that):
+ def __eq__(self, that: object) -> bool:
try:
that_str = json.loads(str(that))
except json.decoder.JSONDecodeError:
return False
- return json.loads(str(self)) == that_str
+ return cast(bool, json.loads(str(self)) == that_str)
-class EqualByPropsMixin:
- """Equal if the props are equal."""
+class EqualByPropsMixin(PropertiesMixin):
+ """A mixin that defines equality as equal if the props are equal."""
- def __eq__(self, that):
- try:
- return self.props == that.props
- except AttributeError:
- return False
+ def __eq__(self, that: object) -> bool:
+ return hasattr(that, "props") and self.props == getattr(that, "props")
-class CanonicalPropertiesMixin:
+class CanonicalPropertiesMixin(PropertiesMixin):
"""A Mixin that provides canonical properties to Schema and Field types."""
@property
- def canonical_properties(self):
+ def canonical_properties(self) -> MutableMapping[str, object]:
props = self.props
return collections.OrderedDict((key, props[key]) for key in CANONICAL_FIELD_ORDER if key in props)
@@ -141,62 +172,30 @@ class CanonicalPropertiesMixin:
class Schema(abc.ABC, CanonicalPropertiesMixin):
"""Base class for all Schema classes."""
- _props = None
-
- def __init__(self, type, other_props=None):
- # Ensure valid ctor args
- if not isinstance(type, str):
- fail_msg = "Schema type must be a string."
- raise avro.errors.SchemaParseException(fail_msg)
- elif type not in VALID_TYPES:
- fail_msg = f"{type} is not a valid type."
- raise avro.errors.SchemaParseException(fail_msg)
-
- # add members
- if self._props is None:
- self._props = {}
- self.set_prop("type", type)
- self.type = type
- self._props.update(other_props or {})
-
- @property
- def props(self):
- return self._props
-
- @property
- def other_props(self):
- """Dictionary of non-reserved properties"""
- return get_other_props(self.props, SCHEMA_RESERVED_PROPS)
+ _reserved_properties = SCHEMA_RESERVED_PROPS
- def check_props(self, other, props):
- """Check that the given props are identical in two schemas.
-
- @arg other: The other schema to check
- @arg props: An iterable of properties to check
- @return bool: True if all the properties match
- """
- return all(getattr(self, prop) == getattr(other, prop) for prop in props)
+ def __init__(self, type_: str, other_props: Optional[MutableMapping[str, object]] = None) -> None:
+ if not isinstance(type_, str):
+ raise avro.errors.SchemaParseException("Schema type must be a string.")
+ if type_ not in VALID_TYPES:
+ raise avro.errors.SchemaParseException(f"{type_} is not a valid type.")
+ self.set_prop("type", type_)
+ self.type = type_
+ self.props.update(other_props or {})
@abc.abstractmethod
- def match(self, writer):
+ def match(self, writer: "Schema") -> bool:
"""Return True if the current schema (as reader) matches the writer schema.
@arg writer: the writer schema to match against.
@return bool
"""
- # utility functions to manipulate properties dict
- def get_prop(self, key):
- return self._props.get(key)
-
- def set_prop(self, key, value):
- self._props[key] = value
-
- def __str__(self):
+ def __str__(self) -> str:
return json.dumps(self.to_json())
@abc.abstractmethod
- def to_json(self, names):
+ def to_json(self, names: Optional[Names] = None) -> object:
"""
Converts the schema object into its AVRO specification representation.
@@ -206,7 +205,7 @@ class Schema(abc.ABC, CanonicalPropertiesMixin):
"""
@abc.abstractmethod
- def validate(self, datum):
+ def validate(self, datum: object) -> Optional["Schema"]:
"""Returns the appropriate schema object if datum is valid for that schema, else None.
To be implemented in subclasses.
@@ -220,7 +219,7 @@ class Schema(abc.ABC, CanonicalPropertiesMixin):
"""
@abc.abstractmethod
- def to_canonical_json(self, names=None):
+ def to_canonical_json(self, names: Optional[Names] = None) -> object:
"""
Converts the schema object into its Canonical Form
http://avro.apache.org/docs/current/spec.html#Parsing+Canonical+Form+for+Schemas
@@ -229,12 +228,12 @@ class Schema(abc.ABC, CanonicalPropertiesMixin):
"""
@property
- def canonical_form(self):
+ def canonical_form(self) -> str:
# The separators eliminate whitespace around commas and colons.
return json.dumps(self.to_canonical_json(), separators=(",", ":"))
@abc.abstractmethod
- def __eq__(self, that):
+ def __eq__(self, that: object) -> bool:
"""
Determines how two schema are compared.
Consider the mixins EqualByPropsMixin and EqualByJsonMixin
@@ -244,22 +243,16 @@ class Schema(abc.ABC, CanonicalPropertiesMixin):
class NamedSchema(Schema):
"""Named Schemas specified in NAMED_TYPES."""
- def __init__(self, type, name, namespace=None, names=None, other_props=None):
+ def __init__(self, type_, name, namespace=None, names=None, other_props=None):
# Ensure valid ctor args
if not name:
- fail_msg = "Named Schemas must have a non-empty name."
- raise avro.errors.SchemaParseException(fail_msg)
- elif not isinstance(name, str):
- fail_msg = "The name property must be a string."
- raise avro.errors.SchemaParseException(fail_msg)
- elif namespace is not None and not isinstance(namespace, str):
- fail_msg = "The namespace property must be a string."
- raise avro.errors.SchemaParseException(fail_msg)
-
+ raise avro.errors.SchemaParseException("Named Schemas must have a non-empty name.")
+ if not isinstance(name, str):
+ raise avro.errors.SchemaParseException("The name property must be a string.")
+ if namespace is not None and not isinstance(namespace, str):
+ raise avro.errors.SchemaParseException("The namespace property must be a string.")
namespace = namespace or None # Empty string -> None
-
- # Call parent ctor
- Schema.__init__(self, type, other_props)
+ super().__init__(type_, other_props)
# Add class members
new_name = names.add_name(name, namespace, self)
@@ -314,9 +307,11 @@ class DecimalLogicalSchema(LogicalSchema):
class Field(CanonicalPropertiesMixin, EqualByJsonMixin):
+ _reserved_properties: Sequence[str] = FIELD_RESERVED_PROPS
+
def __init__(
self,
- type,
+ type_,
name,
has_default,
default=None,
@@ -325,30 +320,22 @@ class Field(CanonicalPropertiesMixin, EqualByJsonMixin):
doc=None,
other_props=None,
):
- # Ensure valid ctor args
if not name:
- fail_msg = "Fields must have a non-empty name."
- raise avro.errors.SchemaParseException(fail_msg)
- elif not isinstance(name, str):
- fail_msg = "The name property must be a string."
- raise avro.errors.SchemaParseException(fail_msg)
- elif order is not None and order not in VALID_FIELD_SORT_ORDERS:
- fail_msg = f"The order property {order} is not valid."
- raise avro.errors.SchemaParseException(fail_msg)
-
- # add members
- self._props = {}
+ raise avro.errors.SchemaParseException("Fields must have a non-empty name.")
+ if not isinstance(name, str):
+ raise avro.errors.SchemaParseException("The name property must be a string.")
+ if order is not None and order not in VALID_FIELD_SORT_ORDERS:
+ raise avro.errors.SchemaParseException(f"The order property {order} is not valid.")
self._has_default = has_default
- self._props.update(other_props or {})
+ self.props.update(other_props or {})
- if isinstance(type, str) and names is not None and names.has_name(type, None):
- type_schema = names.get_name(type, None)
+ if isinstance(type_, str) and names is not None and names.has_name(type_, None):
+ type_schema = names.get_name(type_, None)
else:
try:
- type_schema = make_avsc_object(type, names)
+ type_schema = make_avsc_object(type_, names)
except Exception as e:
- fail_msg = f'Type property "{type}" not a valid Avro schema: {e}'
- raise avro.errors.SchemaParseException(fail_msg)
+ raise avro.errors.SchemaParseException(f'Type property "{type_}" not a valid Avro schema: {e}')
self.set_prop("type", type_schema)
self.set_prop("name", name)
self.type = type_schema
@@ -366,20 +353,6 @@ class Field(CanonicalPropertiesMixin, EqualByJsonMixin):
has_default = property(lambda self: self._has_default)
order = property(lambda self: self.get_prop("order"))
doc = property(lambda self: self.get_prop("doc"))
- props = property(lambda self: self._props)
-
- # Read-only property dict. Non-reserved properties
- other_props = property(
- lambda self: get_other_props(self._props, FIELD_RESERVED_PROPS),
- doc="dictionary of non-reserved properties",
- )
-
- # utility functions to manipulate properties dict
- def get_prop(self, key):
- return self._props.get(key)
-
- def set_prop(self, key, value):
- self._props[key] = value
def __str__(self):
return json.dumps(self.to_json())
@@ -1080,13 +1053,12 @@ class UUIDSchema(LogicalSchema, PrimitiveSchema):
#
-def get_other_props(all_props, reserved_props):
+def get_other_props(all_props: MutableMapping[str, object], reserved_props: Sequence[str]) -> MutableMapping[str, object]:
"""
Retrieve the non-reserved properties from a dictionary of properties
@args reserved_props: The set of reserved properties to exclude
"""
- if callable(getattr(all_props, "items", None)):
- return {k: v for k, v in all_props.items() if k not in reserved_props}
+ return {k: v for k, v in all_props.items() if k not in reserved_props}
def make_bytes_decimal_schema(other_props):