You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2021/06/09 00:59:36 UTC
[avro] branch master updated: AVRO-2921: Add Type Hints for
avro.name (#1251)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 95edb5b AVRO-2921: Add Type Hints for avro.name (#1251)
95edb5b is described below
commit 95edb5b6506015093dad197377bd021fa94776e2
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Tue Jun 8 20:58:01 2021 -0400
AVRO-2921: Add Type Hints for avro.name (#1251)
Refactor name registry into avro.name and add type hints.
---
lang/py/avro/constants.py | 20 ++++++
lang/py/avro/name.py | 165 ++++++++++++++++++++++++++++++++++++++++++++++
lang/py/avro/schema.py | 159 +-------------------------------------------
3 files changed, 187 insertions(+), 157 deletions(-)
diff --git a/lang/py/avro/constants.py b/lang/py/avro/constants.py
index ec6dc61..6bdf5a9 100644
--- a/lang/py/avro/constants.py
+++ b/lang/py/avro/constants.py
@@ -37,3 +37,23 @@ SUPPORTED_LOGICAL_TYPE = [
TIME_MILLIS,
UUID,
]
+
+PRIMITIVE_TYPES = (
+ "null",
+ "boolean",
+ "string",
+ "bytes",
+ "int",
+ "long",
+ "float",
+ "double",
+)
+
+NAMED_TYPES = (
+ "fixed",
+ "enum",
+ "record",
+ "error",
+)
+
+VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + ("array", "map", "union", "request", "error_union")
diff --git a/lang/py/avro/name.py b/lang/py/avro/name.py
new file mode 100644
index 0000000..d5f6ada
--- /dev/null
+++ b/lang/py/avro/name.py
@@ -0,0 +1,165 @@
+#!/usr/bin/env python3
+
+##
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains the Name classes."""
+from typing import TYPE_CHECKING, Dict, Optional
+
+from avro.constants import VALID_TYPES
+
+if TYPE_CHECKING:
+ from avro.schema import NamedSchema
+
+import re
+import warnings
+
+import avro.errors
+
+# The name portion of a fullname, record field names, and enum symbols must:
+# start with [A-Za-z_]
+# subsequently contain only [A-Za-z0-9_]
+_BASE_NAME_PATTERN = re.compile(r"(?:^|\.)[A-Za-z_][A-Za-z0-9_]*$")
+
+
+def validate_basename(basename: str) -> None:
+ """Raise InvalidName if the given basename is not a valid name."""
+ if not _BASE_NAME_PATTERN.search(basename):
+ raise avro.errors.InvalidName(f"{basename!s} is not a valid Avro name because it does not match the pattern {_BASE_NAME_PATTERN.pattern!s}")
+
+
+def _validate_fullname(fullname: str) -> None:
+ for name in fullname.split("."):
+ validate_basename(name)
+
+
+class Name:
+ """Class to describe Avro name."""
+
+ _full: Optional[str] = None
+
+ def __init__(self, name_attr: Optional[str] = None, space_attr: Optional[str] = None, default_space: Optional[str] = None) -> None:
+ """The fullname is determined in one of the following ways:
+
+ - A name and namespace are both specified. For example, one might use "name": "X",
+ "namespace": "org.foo" to indicate the fullname org.foo.X.
+ - A fullname is specified. If the name specified contains a dot,
+ then it is assumed to be a fullname, and any namespace also specified is ignored.
+ For example, use "name": "org.foo.X" to indicate the fullname org.foo.X.
+ - A name only is specified, i.e., a name that contains no dots.
+ In this case the namespace is taken from the most tightly enclosing schema or protocol.
+ For example, if "name": "X" is specified, and this occurs within a field of
+ the record definition of org.foo.Y, then the fullname is org.foo.X.
+ If there is no enclosing namespace then the null namespace is used.
+
+ References to previously defined names are as in the latter two cases above:
+ if they contain a dot they are a fullname,
+ if they do not contain a dot, the namespace is the namespace of the enclosing definition.
+
+ @arg name_attr: name value read in schema or None.
+ @arg space_attr: namespace value read in schema or None. The empty string may be used as a namespace
+ to indicate the null namespace.
+ @arg default_space: the current default space or None.
+ """
+ if name_attr is None:
+ return
+ if name_attr == "":
+ raise avro.errors.SchemaParseException("Name must not be the empty string.")
+ # The empty string may be used as a namespace to indicate the null namespace.
+ self._full = (
+ name_attr
+ if "." in name_attr or space_attr == "" or not (space_attr or default_space)
+ else f"{space_attr or default_space!s}.{name_attr!s}"
+ )
+ _validate_fullname(self._full)
+
+ def __eq__(self, other: object) -> bool:
+ """Equality of names is defined on the fullname and is case-sensitive."""
+ return hasattr(other, "fullname") and self.fullname == getattr(other, "fullname")
+
+ @property
+ def fullname(self) -> Optional[str]:
+ return self._full
+
+ @property
+ def space(self) -> Optional[str]:
+ """Back out a namespace from full name."""
+ full = self._full or ""
+ return full.rsplit(".", 1)[0] if "." in full else None
+
+ def get_space(self) -> Optional[str]:
+ warnings.warn("Name.get_space() is deprecated in favor of Name.space")
+ return self.space
+
+
+class Names:
+ """Track name set and default namespace during parsing."""
+
+ names: Dict[str, "NamedSchema"]
+
+ def __init__(self, default_namespace: Optional[str] = None) -> None:
+ self.names = {}
+ self.default_namespace = default_namespace
+
+ def has_name(self, name_attr: str, space_attr: Optional[str] = None) -> bool:
+ test = Name(name_attr, space_attr, self.default_namespace).fullname
+ return test in self.names
+
+ def get_name(self, name_attr: str, space_attr: Optional[str] = None) -> Optional["NamedSchema"]:
+ test = Name(name_attr, space_attr, self.default_namespace).fullname
+ return None if test is None else self.names.get(test)
+
+ def prune_namespace(self, properties: Dict[str, object]) -> Dict[str, object]:
+ """given a properties, return properties with namespace removed if
+ it matches the own default namespace"""
+ if self.default_namespace is None:
+ # I have no default -- no change
+ return properties
+
+ if "namespace" not in properties:
+ # he has no namespace - no change
+ return properties
+
+ if properties["namespace"] != self.default_namespace:
+ # we're different - leave his stuff alone
+ return properties
+
+ # we each have a namespace and it's redundant. delete his.
+ prunable = properties.copy()
+ del prunable["namespace"]
+ return prunable
+
+ def add_name(self, name_attr: str, space_attr: str, new_schema: "NamedSchema") -> Name:
+ """
+ Add a new schema object to the name set.
+
+ @arg name_attr: name value read in schema
+ @arg space_attr: namespace value read in schema.
+
+ @return: the Name that was just added.
+ """
+ to_add = Name(name_attr, space_attr, self.default_namespace)
+
+ if to_add.fullname in VALID_TYPES:
+ raise avro.errors.SchemaParseException(f"{to_add.fullname} is a reserved type name.")
+ if to_add.fullname in self.names:
+ raise avro.errors.SchemaParseException(f'The name "{to_add.fullname}" is already in use.')
+ if to_add.fullname is None:
+ raise avro.errors.SchemaParseException(f'The name built from "{space_attr}.{name_attr}" is None')
+
+ self.names[to_add.fullname] = new_schema
+ return to_add
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index 43c264d..ddd02f9 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -44,43 +44,19 @@ import datetime
import decimal
import json
import math
-import re
import sys
import uuid
import warnings
import avro.constants
import avro.errors
+from avro.constants import NAMED_TYPES, PRIMITIVE_TYPES, VALID_TYPES
+from avro.name import Name, Names, validate_basename
#
# Constants
#
-# The name portion of a fullname, record field names, and enum symbols must:
-# start with [A-Za-z_]
-# subsequently contain only [A-Za-z0-9_]
-_BASE_NAME_PATTERN = re.compile(r"(?:^|\.)[A-Za-z_][A-Za-z0-9_]*$")
-
-PRIMITIVE_TYPES = (
- "null",
- "boolean",
- "string",
- "bytes",
- "int",
- "long",
- "float",
- "double",
-)
-
-NAMED_TYPES = (
- "fixed",
- "enum",
- "record",
- "error",
-)
-
-VALID_TYPES = PRIMITIVE_TYPES + NAMED_TYPES + ("array", "map", "union", "request", "error_union")
-
SCHEMA_RESERVED_PROPS = (
"type",
"name",
@@ -123,12 +99,6 @@ LONG_MIN_VALUE = -(1 << 63)
LONG_MAX_VALUE = (1 << 63) - 1
-def validate_basename(basename):
- """Raise InvalidName if the given basename is not a valid name."""
- if not _BASE_NAME_PATTERN.search(basename):
- raise avro.errors.InvalidName(f"{basename!s} is not a valid Avro name because it does not match the pattern {_BASE_NAME_PATTERN.pattern!s}")
-
-
def _is_timezone_aware_datetime(dt):
return dt.tzinfo is not None and dt.tzinfo.utcoffset(dt) is not None
@@ -271,131 +241,6 @@ class Schema(abc.ABC, CanonicalPropertiesMixin):
"""
-class Name:
- """Class to describe Avro name."""
-
- _full = None
-
- def __init__(self, name_attr, space_attr, default_space):
- """The fullname is determined in one of the following ways:
-
- - A name and namespace are both specified. For example, one might use "name": "X",
- "namespace": "org.foo" to indicate the fullname org.foo.X.
- - A fullname is specified. If the name specified contains a dot,
- then it is assumed to be a fullname, and any namespace also specified is ignored.
- For example, use "name": "org.foo.X" to indicate the fullname org.foo.X.
- - A name only is specified, i.e., a name that contains no dots.
- In this case the namespace is taken from the most tightly enclosing schema or protocol.
- For example, if "name": "X" is specified, and this occurs within a field of
- the record definition of org.foo.Y, then the fullname is org.foo.X.
- If there is no enclosing namespace then the null namespace is used.
-
- References to previously defined names are as in the latter two cases above:
- if they contain a dot they are a fullname,
- if they do not contain a dot, the namespace is the namespace of the enclosing definition.
-
- @arg name_attr: name value read in schema or None.
- @arg space_attr: namespace value read in schema or None. The empty string may be used as a namespace
- to indicate the null namespace.
- @arg default_space: the current default space or None.
- """
- if name_attr is None:
- return
- if name_attr == "":
- raise avro.errors.SchemaParseException("Name must not be the empty string.")
-
- if "." in name_attr or space_attr == "" or not (space_attr or default_space):
- # The empty string may be used as a namespace to indicate the null namespace.
- self._full = name_attr
- else:
- self._full = f"{space_attr or default_space!s}.{name_attr!s}"
-
- self._validate_fullname(self._full)
-
- def _validate_fullname(self, fullname):
- for name in fullname.split("."):
- validate_basename(name)
-
- def __eq__(self, other):
- """Equality of names is defined on the fullname and is case-sensitive."""
- try:
- return self.fullname == other.fullname
- except AttributeError:
- return False
-
- @property
- def fullname(self):
- return self._full
-
- @property
- def space(self):
- """Back out a namespace from full name."""
- if self._full is None:
- return None
- return self._full.rsplit(".", 1)[0] if "." in self._full else None
-
- def get_space(self):
- warnings.warn("Name.get_space() is deprecated in favor of Name.space")
- return self.space
-
-
-class Names:
- """Track name set and default namespace during parsing."""
-
- def __init__(self, default_namespace=None):
- self.names = {}
- self.default_namespace = default_namespace
-
- def has_name(self, name_attr, space_attr):
- test = Name(name_attr, space_attr, self.default_namespace).fullname
- return test in self.names
-
- def get_name(self, name_attr, space_attr):
- test = Name(name_attr, space_attr, self.default_namespace).fullname
- return self.names.get(test)
-
- def prune_namespace(self, properties):
- """given a properties, return properties with namespace removed if
- it matches the own default namespace"""
- if self.default_namespace is None:
- # I have no default -- no change
- return properties
-
- if "namespace" not in properties:
- # he has no namespace - no change
- return properties
-
- if properties["namespace"] != self.default_namespace:
- # we're different - leave his stuff alone
- return properties
-
- # we each have a namespace and it's redundant. delete his.
- prunable = properties.copy()
- del prunable["namespace"]
- return prunable
-
- def add_name(self, name_attr, space_attr, new_schema):
- """
- Add a new schema object to the name set.
-
- @arg name_attr: name value read in schema
- @arg space_attr: namespace value read in schema.
-
- @return: the Name that was just added.
- """
- to_add = Name(name_attr, space_attr, self.default_namespace)
-
- if to_add.fullname in VALID_TYPES:
- fail_msg = f"{to_add.fullname} is a reserved type name."
- raise avro.errors.SchemaParseException(fail_msg)
- elif to_add.fullname in self.names:
- fail_msg = f'The name "{to_add.fullname}" is already in use.'
- raise avro.errors.SchemaParseException(fail_msg)
-
- self.names[to_add.fullname] = new_schema
- return to_add
-
-
class NamedSchema(Schema):
"""Named Schemas specified in NAMED_TYPES."""