You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ko...@apache.org on 2019/11/21 03:36:12 UTC
[avro] branch master updated: AVRO-621: Stricter Name Validation
(#718)
This is an automated email from the ASF dual-hosted git repository.
kojiromike pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
View the commit online:
https://github.com/apache/avro/commit/f8afef1f98b1f27deb9e73ef804ed74ba2d20bbb
The following commit(s) were added to refs/heads/master by this push:
new f8afef1 AVRO-621: Stricter Name Validation (#718)
f8afef1 is described below
commit f8afef1f98b1f27deb9e73ef804ed74ba2d20bbb
Author: Michael A. Smith <mi...@smith-li.com>
AuthorDate: Wed Nov 20 22:36:03 2019 -0500
AVRO-621: Stricter Name Validation (#718)
* AVRO-621: Test an Invalid Name
* AVRO-621: Stricter Name Checking
* AVRO-621 Adhere Better to Spec
* AVRO-621 More Complete Name Tests
---
lang/py/src/avro/schema.py | 108 ++++++++++++++++++++++----------------------
lang/py/test/test_schema.py | 73 ++++++++++++++++--------------
2 files changed, 91 insertions(+), 90 deletions(-)
diff --git a/lang/py/src/avro/schema.py b/lang/py/src/avro/schema.py
index 60a826d..a6116e2 100644
--- a/lang/py/src/avro/schema.py
+++ b/lang/py/src/avro/schema.py
@@ -42,6 +42,7 @@ from __future__ import absolute_import, division, print_function
import json
import math
+import re
import sys
import warnings
@@ -113,6 +114,9 @@ class AvroException(Exception):
class SchemaParseException(AvroException):
pass
+class InvalidName(SchemaParseException):
+ """User attempted to parse a schema with an invalid name."""
+
class AvroWarning(UserWarning):
"""Base class for warnings."""
@@ -173,68 +177,65 @@ class Schema(object):
class Name(object):
"""Class to describe Avro name."""
- def __init__(self, name_attr, space_attr, default_space):
- """
- Formulate full name according to the specification.
-
- @arg name_attr: name value read in schema or None.
- @arg space_attr: namespace value read in schema or None.
- @ard default_space: the current default space or None.
- """
- # Ensure valid ctor args
- if not (isinstance(name_attr, basestring) or (name_attr is None)):
- fail_msg = 'Name must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
- elif name_attr == "":
- fail_msg = 'Name must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
+ # The name portion of a fullname, record field names, and enum symbols must:
+ # start with [A-Za-z_]
+ # subsequently contain only [A-Za-z0-9_]
+ _base_name_pattern = re.compile(r'(?:^|\.)[A-Za-z_][A-Za-z0-9_]*$')
- if not (isinstance(space_attr, basestring) or (space_attr is None)):
- fail_msg = 'Space must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
- elif name_attr == "":
- fail_msg = 'Space must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
+ _full = None
- if not (isinstance(default_space, basestring) or (default_space is None)):
- fail_msg = 'Default space must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
- elif name_attr == "":
- fail_msg = 'Default must be non-empty string or None.'
- raise SchemaParseException(fail_msg)
+ def __init__(self, name_attr, space_attr, default_space):
+ """The fullname is determined in one of the following ways:
- self._full = None;
+ - A name and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X.
+ - A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X.
+ - A name only is specified, i.e., a name that contains no dots. In this case the namespace is taken from the most tightly enclosing schema or protocol. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. If there is no enclosing namespace then the null namespace is used.
- if name_attr is None or name_attr == "":
- return;
+ References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition.
- if (name_attr.find('.') < 0):
- if (space_attr is not None) and (space_attr != ""):
- self._full = "%s.%s" % (space_attr, name_attr)
- else:
- if (default_space is not None) and (default_space != ""):
- self._full = "%s.%s" % (default_space, name_attr)
- else:
- self._full = name_attr
+ @arg name_attr: name value read in schema or None.
+ @arg space_attr: namespace value read in schema or None. The empty string may be used as a namespace to indicate the null namespace.
+ @arg default_space: the current default space or None.
+ """
+ if name_attr is None:
+ return
+ if name_attr == "":
+ raise SchemaParseException('Name must not be the empty string.')
+
+ if '.' in name_attr or space_attr == "" or not (space_attr or default_space):
+ # The empty string may be used as a namespace to indicate the null namespace.
+ self._full = name_attr
else:
- self._full = name_attr
+ self._full = "{!s}.{!s}".format(space_attr or default_space, name_attr)
+
+ self._validate_fullname(self._full)
+
+ def _validate_fullname(self, fullname):
+ for name in fullname.split('.'):
+ if not self._base_name_pattern.search(name):
+ raise InvalidName("{!s} is not a valid Avro name because it "
+ "does not match the pattern {!s}".format(
+ name, self._base_name_pattern.pattern))
def __eq__(self, other):
- if not isinstance(other, Name):
- return False
- return (self.fullname == other.fullname)
+ """Equality of names is defined on the fullname and is case-sensitive."""
+ return isinstance(other, Name) and self.fullname == other.fullname
- fullname = property(lambda self: self._full)
+ @property
+ def fullname(self):
+ return self._full
- def get_space(self):
+ @property
+ def space(self):
"""Back out a namespace from full name."""
if self._full is None:
- return None
+ return None
+ return self._full.rsplit(".", 1)[0] if "." in self._full else None
+
+ def get_space(self):
+ warnings.warn('Name.get_space() is deprecated in favor of Name.space')
+ return self.space
- if (self._full.find('.') > 0):
- return self._full.rsplit(".", 1)[0]
- else:
- return ""
class Names(object):
"""Track name set and default namespace during parsing."""
@@ -313,16 +314,13 @@ class NamedSchema(Schema):
# Store name and namespace as they were read in origin schema
self.set_prop('name', name)
if namespace is not None:
- self.set_prop('namespace', new_name.get_space())
+ self.set_prop('namespace', new_name.space)
# Store full name as calculated from name, namespace
self._fullname = new_name.fullname
def name_ref(self, names):
- if self.namespace == names.default_namespace:
- return self.name
- else:
- return self.fullname
+ return self.name if self.namespace == names.default_namespace else self.fullname
# read-only properties
name = property(lambda self: self.get_prop('name'))
@@ -751,7 +749,7 @@ class RecordSchema(NamedSchema):
if schema_type == 'record':
old_default = names.default_namespace
names.default_namespace = Name(name, namespace,
- names.default_namespace).get_space()
+ names.default_namespace).space
# Add class members
field_objects = RecordSchema.make_field_objects(fields, names)
diff --git a/lang/py/test/test_schema.py b/lang/py/test/test_schema.py
index ea6779d..63b0705 100644
--- a/lang/py/test/test_schema.py
+++ b/lang/py/test/test_schema.py
@@ -71,6 +71,7 @@ FIXED_EXAMPLES = [
"namespace": "org.apache.hadoop.avro"}),
InvalidTestSchema({"type": "fixed", "name": "Missing size"}),
InvalidTestSchema({"type": "fixed", "size": 314}),
+ InvalidTestSchema({"type": "fixed", "size": 314, "name": "dr. spaceman"}, comment='AVRO-621'),
]
ENUM_EXAMPLES = [
@@ -315,61 +316,63 @@ class TestSchema(unittest.TestCase):
# If we've made it this far, the subschema was reasonably stringified; it ccould be reparsed.
self.assertEqual("X", t.fields[0].type.name)
- # TODO(hammer): more tests
- def test_fullname(self):
- """Test schema full names
-
- The fullname is determined in one of the following ways:
- * A name and namespace are both specified. For example,
- one might use "name": "X", "namespace": "org.foo"
- to indicate the fullname "org.foo.X".
- * A fullname is specified. If the name specified contains
- a dot, then it is assumed to be a fullname, and any
- namespace also specified is ignored. For example,
- use "name": "org.foo.X" to indicate the
- fullname "org.foo.X".
- * A name only is specified, i.e., a name that contains no
- dots. In this case the namespace is taken from the most
- tightly encosing schema or protocol. For example,
- if "name": "X" is specified, and this occurs
- within a field of the record definition
- of "org.foo.Y", then the fullname is "org.foo.X".
-
- References to previously defined names are as in the latter
- two cases above: if they contain a dot they are a fullname, if
- they do not contain a dot, the namespace is the namespace of
- the enclosing definition.
-
- Primitive type names have no namespace and their names may
- not be defined in any namespace. A schema may only contain
- multiple definitions of a fullname if the definitions are
- equivalent.
- """
+ def test_name_is_none(self):
+ """When a name is None its namespace is None."""
+ self.assertIsNone(schema.Name(None, None, None).fullname)
+ self.assertIsNone(schema.Name(None, None, None).space)
+
+ def test_name_not_empty_string(self):
+ """A name cannot be the empty string."""
+ self.assertRaises(schema.SchemaParseException, schema.Name, "", None, None)
+ def test_name_space_specified(self):
+ """Space combines with a name to become the fullname."""
# name and namespace specified
fullname = schema.Name('a', 'o.a.h', None).fullname
self.assertEqual(fullname, 'o.a.h.a')
- # fullname and namespace specified
+ def test_fullname_space_specified(self):
+ """When name contains dots, namespace should be ignored."""
fullname = schema.Name('a.b.c.d', 'o.a.h', None).fullname
self.assertEqual(fullname, 'a.b.c.d')
- # name and default namespace specified
+ def test_name_default_specified(self):
+ """Default space becomes the namespace when the namespace is None."""
fullname = schema.Name('a', None, 'b.c.d').fullname
self.assertEqual(fullname, 'b.c.d.a')
- # fullname and default namespace specified
+ def test_fullname_default_specified(self):
+ """When a name contains dots, default space should be ignored."""
fullname = schema.Name('a.b.c.d', None, 'o.a.h').fullname
self.assertEqual(fullname, 'a.b.c.d')
- # fullname, namespace, default namespace specified
+ def test_fullname_space_default_specified(self):
+ """When a name contains dots, namespace and default space should be ignored."""
fullname = schema.Name('a.b.c.d', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'a.b.c.d')
- # name, namespace, default namespace specified
+ def test_name_space_default_specified(self):
+ """When name and space are specified, default space should be ignored."""
fullname = schema.Name('a', 'o.a.a', 'o.a.h').fullname
self.assertEqual(fullname, 'o.a.a.a')
+ def test_equal_names(self):
+ """Equality of names is defined on the fullname and is case-sensitive."""
+ self.assertEqual(schema.Name('a.b.c.d', None, None), schema.Name('d', 'a.b.c', None))
+ self.assertNotEqual(schema.Name('C.d', None, None), schema.Name('c.d', None, None))
+
+ def test_invalid_name(self):
+ """The name portion of a fullname, record field names, and enum symbols must:
+ start with [A-Za-z_] and subsequently contain only [A-Za-z0-9_]"""
+ self.assertRaises(schema.InvalidName, schema.Name, 'an especially spacey cowboy', None, None)
+ self.assertRaises(schema.InvalidName, schema.Name, '99 problems but a name aint one', None, None)
+
+ def test_null_namespace(self):
+ """The empty string may be used as a namespace to indicate the null namespace."""
+ name = schema.Name('name', "", None)
+ self.assertEqual(name.fullname, "name")
+ self.assertIsNone(name.space)
+
def test_exception_is_not_swallowed_on_parse_error(self):
"""A specific exception message should appear on a json parse error."""
try: