You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ha...@apache.org on 2010/06/10 02:00:17 UTC

svn commit: r953190 - in /avro/trunk: CHANGES.txt lang/py/src/avro/protocol.py lang/py/src/avro/schema.py lang/py/test/test_protocol.py lang/py/test/test_schema.py

Author: hammer
Date: Thu Jun 10 00:00:17 2010
New Revision: 953190

URL: http://svn.apache.org/viewvc?rev=953190&view=rev
Log:
AVRO-284. Handle namespaces correctly in new Python implementation
(Patrick Wendell via hammer)


Modified:
    avro/trunk/CHANGES.txt
    avro/trunk/lang/py/src/avro/protocol.py
    avro/trunk/lang/py/src/avro/schema.py
    avro/trunk/lang/py/test/test_protocol.py
    avro/trunk/lang/py/test/test_schema.py

Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=953190&r1=953189&r2=953190&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Thu Jun 10 00:00:17 2010
@@ -28,6 +28,9 @@ Avro 1.4.0 (unreleased)
 
     AVRO-540. Java: Make GenericArray reversible.  (Eric Evans via cutting)
 
+    AVRO-284. Handle namespaces correctly in new Python implementation
+    (Patrick Wendell via hammer)
+
   BUG FIXES
 
     AVRO-502. Memory leak from parsing JSON schema.

Modified: avro/trunk/lang/py/src/avro/protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/protocol.py?rev=953190&r1=953189&r2=953190&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/protocol.py (original)
+++ avro/trunk/lang/py/src/avro/protocol.py Thu Jun 10 00:00:17 2010
@@ -92,8 +92,10 @@ class Protocol(object):
 
     self._props = {}
     self.set_prop('name', name)
-    if namespace is not None: self.set_prop('namespace', namespace)
-    type_names = {}
+    type_names = schema.Names()
+    if namespace is not None: 
+      self.set_prop('namespace', namespace)
+      type_names.default_namespace = namespace
     if types is not None:
       self.set_prop('types', self._parse_types(types, type_names))
     if messages is not None:
@@ -107,7 +109,7 @@ class Protocol(object):
   name = property(lambda self: self.get_prop('name'))
   namespace = property(lambda self: self.get_prop('namespace'))
   fullname = property(lambda self: 
-                      schema.Name.make_fullname(self.name, self.namespace))
+                      schema.Name(self.name, self.namespace).get_full_name())
   types = property(lambda self: self.get_prop('types'))
   types_dict = property(lambda self: dict([(type.name, type)
                                            for type in self.types]))
@@ -149,9 +151,9 @@ class Message(object):
     return schema.RecordSchema(None, None, request, names, 'request')
   
   def _parse_response(self, response, names):
-    if isinstance(response, basestring) and names.has_key(response):
+    if isinstance(response, basestring) and names.has_name(response, None):
       self._response_from_names = True
-      return names.get(response)
+      return names.get_name(response, None)
     else:
       return schema.make_avsc_object(response, names)
 

Modified: avro/trunk/lang/py/src/avro/schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/src/avro/schema.py?rev=953190&r1=953189&r2=953190&view=diff
==============================================================================
--- avro/trunk/lang/py/src/avro/schema.py (original)
+++ avro/trunk/lang/py/src/avro/schema.py Thu Jun 10 00:00:17 2010
@@ -125,36 +125,108 @@ class Schema(object):
     self.props[key] = value
 
 class Name(object):
-  """Container class for static methods on Avro names."""
-  @staticmethod
-  def make_fullname(name, namespace):
-    if name.find('.') < 0 and namespace is not None:
-      return '.'.join([namespace, name])
-    else:
-      return name
-
-  @staticmethod
-  def extract_namespace(name, namespace):
-    parts = name.rsplit('.', 1)
-    if len(parts) > 1:
-      namespace, name = parts
-    return name, namespace
+  """Class to describe Avro name."""
+  
+  def __init__(self, name_attr, space_attr, default_space):
+    """
+    Formulate full name according to the specification.
+    
+    @arg name_attr: name value read in schema or None.
+    @arg space_attr: namespace value read in schema or None.
+    @ard default_space: the current default space or None.
+    """
+    # Ensure valid ctor args
+    if not (isinstance(name_attr, basestring) or (name_attr is None)):
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Name must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
 
-  @staticmethod
-  def add_name(names, new_schema):
-    """Add a new schema object to the names dictionary (in place)."""
-    new_fullname = new_schema.fullname
-    if new_fullname in VALID_TYPES:
-      fail_msg = '%s is a reserved type name.' % new_fullname
+    if not (isinstance(space_attr, basestring) or (space_attr is None)):
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+  
+    if not (isinstance(default_space, basestring) or (default_space is None)):
+      fail_msg = 'Default space must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    elif name_attr == "":
+      fail_msg = 'Default must be non-empty string or None.'
+      raise SchemaParseException(fail_msg)
+    
+    self._full = None; 
+    
+    if name_attr is None or name_attr == "":
+        return;
+    
+    if (name_attr.find('.') < 0):
+      if (space_attr is not None) and (space_attr != ""):
+        self._full = "%s.%s" % (space_attr, name_attr)
+      else:
+        if (default_space is not None) and (default_space != ""):
+           self._full = "%s.%s" % (default_space, name_attr)
+        else:
+          self._full = name_attr
+    else:
+        self._full = name_attr         
+    
+  def __eq__(self, other):
+    if not isinstance(other, Name):
+        return False
+    return (self.get_full_name() == other.get_full_name())
+      
+  def get_full_name(self):
+    return self._full;
+
+  def get_space(self):
+    """Back out a namespace from full name."""
+    if self._full is None:
+        return None
+    
+    if (self._full.find('.') > 0):
+      return self._full.rsplit(".", 1)[0]
+    else:
+      return ""
+
+class Names(object):
+  """Track name set and default namespace during parsing."""
+  def __init__(self, default_namespace=None):
+      self.names = {}
+      self.default_namespace = default_namespace
+      
+  def has_name(self, name_attr, space_attr):
+      test = Name(name_attr, space_attr, self.default_namespace).get_full_name()
+      return self.names.has_key(test)
+  
+  def get_name(self, name_attr, space_attr):    
+      test = Name(name_attr, space_attr, self.default_namespace).get_full_name()
+      if not self.names.has_key(test):
+          return None
+      return self.names[test]
+      
+  def add_name(self, name_attr, space_attr, new_schema):
+    """
+    Add a new schema object to the name set.
+    
+      @arg name_attr: name value read in schema
+      @arg space_attr: namespace value read in schema.
+      
+      @return: the Name that was just added.
+    """
+    to_add = Name(name_attr, space_attr, self.default_namespace)
+    
+    if to_add.get_full_name() in VALID_TYPES:
+      fail_msg = '%s is a reserved type name.' % to_add.get_full_name()
       raise SchemaParseException(fail_msg)
-    elif names is not None and names.has_key(new_fullname):
-      fail_msg = 'The name "%s" is already in use.' % new_fullname
+    elif self.names.has_key(to_add.get_full_name()):
+      fail_msg = 'The name "%s" is already in use.' % to_add.get_full_name()
       raise SchemaParseException(fail_msg)
-    elif names is None:
-      names = {}
 
-    names[new_fullname] = new_schema
-    return names
+    self.names[to_add.get_full_name()] = new_schema
+    return to_add
 
 class NamedSchema(Schema):
   """Named Schemas specified in NAMED_TYPES."""
@@ -174,18 +246,17 @@ class NamedSchema(Schema):
     Schema.__init__(self, type)
 
     # Add class members
-    name, namespace = Name.extract_namespace(name, namespace)
-    self.set_prop('name', name)
-    if namespace is not None: self.set_prop('namespace', namespace)
-
-    # Add name to names dictionary
-    names = Name.add_name(names, self)
+    new_name = names.add_name(name, namespace, self)
+    self.set_prop('name', new_name.get_full_name())
+    if new_name.get_space() is not None: 
+        self.set_prop('namespace', new_name.get_space())
+    self.set_prop('fullname', new_name.get_full_name())
+    
 
   # read-only properties
   name = property(lambda self: self.get_prop('name'))
   namespace = property(lambda self: self.get_prop('namespace'))
-  fullname = property(lambda self: 
-                      Name.make_fullname(self.name, self.namespace))
+  fullname = property(lambda self: self.get_prop('fullname'))
 
 class Field(object):
   def __init__(self, type, name, has_default, default=None, order=None, names=None):
@@ -204,15 +275,16 @@ class Field(object):
     self._props = {}
     self._type_from_names = False
     self._has_default = has_default
+
     if (isinstance(type, basestring) and names is not None
-        and names.has_key(type)):
-      type_schema = names[type]
+        and names.has_name(type, None)):
+      type_schema = names.get_name(type, None)
       self._type_from_names = True
     else:
       try:
         type_schema = make_avsc_object(type, names)
       except:
-        fail_msg = 'Type property not a valid Avro schema.'
+        fail_msg = 'Type property "%s" not a valid Avro schema.' % type
         raise SchemaParseException(fail_msg)
     self.set_prop('type', type_schema)
     self.set_prop('name', name)
@@ -335,10 +407,10 @@ class ArraySchema(Schema):
 
     # Call parent ctor
     Schema.__init__(self, 'array')
-
     # Add class members
-    if isinstance(items, basestring) and names.has_key(items):
-      items_schema = names[items]
+
+    if isinstance(items, basestring) and names.has_name(items, None):
+      items_schema = names.get_name(items, None)
       self._items_schema_from_names = True
     else:
       try:
@@ -374,8 +446,8 @@ class MapSchema(Schema):
     Schema.__init__(self, 'map')
 
     # Add class members
-    if isinstance(values, basestring) and names.has_key(values):
-      values_schema = names[values]
+    if isinstance(values, basestring) and names.has_name(values, None):
+      values_schema = names.get_name(values)
       self._values_schema_from_names = True
     else:
       try:
@@ -421,8 +493,8 @@ class UnionSchema(Schema):
     self._schema_from_names_indices = []
     for i, schema in enumerate(schemas):
       from_names = False
-      if isinstance(schema, basestring) and names.has_key(schema):
-        new_schema = names[schema]
+      if isinstance(schema, basestring) and names.has_name(schema, None):
+        new_schema = names.get_name(schema, None)
         from_names = True
       else:
         try:
@@ -434,7 +506,7 @@ class UnionSchema(Schema):
           and new_schema.type in [schema.type for schema in schema_objects]):
         raise SchemaParseException('%s type already in Union' % new_schema.type)
       elif new_schema.type == 'union':
-        raise SchemaParseException('Unions cannont contain other unions.')
+        raise SchemaParseException('Unions cannot contain other unions.')
       else:
         schema_objects.append(new_schema)
         if from_names: self._schema_from_names_indices.append(i)
@@ -482,7 +554,7 @@ class RecordSchema(NamedSchema):
     field_names = []
     for i, field in enumerate(field_data):
       if hasattr(field, 'get') and callable(field.get):
-        type = field.get('type')
+        type = field.get('type')        
         name = field.get('name')
 
         # null values can have a default value of None
@@ -519,9 +591,17 @@ class RecordSchema(NamedSchema):
     else:
       NamedSchema.__init__(self, schema_type, name, namespace, names)
 
+    if schema_type == 'record': 
+      old_default = names.default_namespace
+      names.default_namespace = Name(name, namespace,
+                                     names.default_namespace).get_space()
+
     # Add class members
     field_objects = RecordSchema.make_field_objects(fields, names)
     self.set_prop('fields', field_objects)
+    
+    if schema_type == 'record':
+      names.default_namespace = old_default
 
   # read-only properties
   fields = property(lambda self: self.get_prop('fields'))
@@ -554,8 +634,11 @@ def make_avsc_object(json_data, names=No
   """
   Build Avro Schema from data parsed out of JSON string.
 
-  @arg names: dict of schema name, object pairs
+  @arg names: A Name object (tracks seen names and default space)
   """
+  if names == None:
+    names = Names()
+  
   # JSON object (non-union)
   if hasattr(json_data, 'get') and callable(json_data.get):
     type = json_data.get('type')
@@ -612,8 +695,8 @@ def parse(json_string):
   except:
     raise SchemaParseException('Error parsing JSON: %s' % json_string)
 
-  # Initialize the names dictionary
-  names = {}
+  # Initialize the names object
+  names = Names()
 
   # construct the Avro Schema object
   return make_avsc_object(json_data, names)

Modified: avro/trunk/lang/py/test/test_protocol.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_protocol.py?rev=953190&r1=953189&r2=953190&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_protocol.py (original)
+++ avro/trunk/lang/py/test/test_protocol.py Thu Jun 10 00:00:17 2010
@@ -146,6 +146,142 @@ EXAMPLES = [
 
 }
     """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestImplicitNamespace",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "unqalified", "type": "ReferencedRecord"} ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestNamespaceTwo",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "TestRecord", "type": "record",
+      "fields": [ {"name": "hash", "type": "org.apache.avro.test.util.MD5"},
+                  {"name": "qualified", 
+                    "type": "org.apache.avro.other.namespace.ReferencedRecord"} 
+                ]
+     },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "org.apache.avro.test.namespace.TestRecord"}],
+         "response": "TestRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestValidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "namespace": "org.apache.avro.other.namespace", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, True),
+ExampleProtocol("""\
+{"namespace": "org.apache.avro.test.namespace",
+ "protocol": "TestInvalidRepeatedName",
+
+ "types": [
+     {"name": "org.apache.avro.test.util.MD5", "type": "fixed", "size": 16},
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "foo", "type": "string"} ] },
+     {"name": "ReferencedRecord", "type": "record", 
+       "fields": [ {"name": "bar", "type": "double"} ] },
+     {"name": "TestError",
+      "type": "error", "fields": [ {"name": "message", "type": "string"} ]
+     }
+ ],
+
+ "messages": {
+     "echo": {
+         "request": [{"name": "qualified", 
+             "type": "ReferencedRecord"}],
+         "response": "org.apache.avro.other.namespace.ReferencedRecord"
+     },
+
+     "error": {
+         "request": [],
+         "response": "null",
+         "errors": ["org.apache.avro.test.namespace.TestError"]
+     }
+
+ }
+
+}
+    """, False),
   ExampleProtocol("""\
 {"namespace": "org.apache.avro.test",
  "protocol": "BulkData",

Modified: avro/trunk/lang/py/test/test_schema.py
URL: http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_schema.py?rev=953190&r1=953189&r2=953190&view=diff
==============================================================================
--- avro/trunk/lang/py/test/test_schema.py (original)
+++ avro/trunk/lang/py/test/test_schema.py Thu Jun 10 00:00:17 2010
@@ -367,8 +367,30 @@ class TestSchema(unittest.TestCase):
     equivalent.
     """
     print_test_name('TEST FULLNAME')
-    fullname = schema.Name.make_fullname('a', 'o.a.h')
+
+    # name and namespace specified    
+    fullname = schema.Name('a', 'o.a.h', None).get_full_name()
     self.assertEqual(fullname, 'o.a.h.a')
 
+    # fullname and namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.h', None).get_full_name()
+    self.assertEqual(fullname, 'a.b.c.d')
+    
+    # name and default namespace specified
+    fullname = schema.Name('a', None, 'b.c.d').get_full_name()
+    self.assertEqual(fullname, 'b.c.d.a')
+
+    # fullname and default namespace specified
+    fullname = schema.Name('a.b.c.d', None, 'o.a.h').get_full_name()
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # fullname, namespace, default namespace specified
+    fullname = schema.Name('a.b.c.d', 'o.a.a', 'o.a.h').get_full_name()
+    self.assertEqual(fullname, 'a.b.c.d')
+
+    # name, namespace, default namespace specified
+    fullname = schema.Name('a', 'o.a.a', 'o.a.h').get_full_name()
+    self.assertEqual(fullname, 'o.a.a.a')
+
 if __name__ == '__main__':
   unittest.main()