You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2009/05/20 01:35:40 UTC

svn commit: r776496 [1/2] - in /hadoop/avro/trunk: ./ lib/py/ src/doc/content/xdocs/ src/java/org/apache/avro/ src/java/org/apache/avro/generic/ src/java/org/apache/avro/reflect/ src/java/org/apache/avro/specific/ src/py/avro/ src/test/java/org/apache/...

Author: cutting
Date: Tue May 19 23:35:39 2009
New Revision: 776496

URL: http://svn.apache.org/viewvc?rev=776496&view=rev
Log:
AVRO-18.  Add support for enum types.

Added:
    hadoop/avro/trunk/lib/py/odict.py
Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/src/doc/content/xdocs/spec.xml
    hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java
    hadoop/avro/trunk/src/java/org/apache/avro/Schema.java
    hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java
    hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java
    hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java
    hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
    hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
    hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
    hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java
    hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java
    hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java
    hadoop/avro/trunk/src/py/avro/generic.py
    hadoop/avro/trunk/src/py/avro/protocol.py
    hadoop/avro/trunk/src/py/avro/reflect.py
    hadoop/avro/trunk/src/py/avro/schema.py
    hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java
    hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java
    hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolSpecific.java
    hadoop/avro/trunk/src/test/java/org/apache/avro/TestSchema.java
    hadoop/avro/trunk/src/test/py/testio.py
    hadoop/avro/trunk/src/test/py/testipc.py
    hadoop/avro/trunk/src/test/py/testipcreflect.py
    hadoop/avro/trunk/src/test/schemata/interop.js
    hadoop/avro/trunk/src/test/schemata/simple.js

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue May 19 23:35:39 2009
@@ -19,6 +19,8 @@
 
     AVRO-33.  C support for primitive types.  (Matt Massie via cutting)
 
+    AVRO-18.  Add support for enum types.  (cutting & sharad)
+
   IMPROVEMENTS
 
     AVRO-11.  Re-implement specific and reflect datum readers and

Added: hadoop/avro/trunk/lib/py/odict.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lib/py/odict.py?rev=776496&view=auto
==============================================================================
--- hadoop/avro/trunk/lib/py/odict.py (added)
+++ hadoop/avro/trunk/lib/py/odict.py Tue May 19 23:35:39 2009
@@ -0,0 +1,1399 @@
+# odict.py
+# An Ordered Dictionary object
+# Copyright (C) 2005 Nicola Larosa, Michael Foord
+# E-mail: nico AT tekNico DOT net, fuzzyman AT voidspace DOT org DOT uk
+
+# This software is licensed under the terms of the BSD license.
+# http://www.voidspace.org.uk/python/license.shtml
+# Basically you're free to copy, modify, distribute and relicense it,
+# So long as you keep a copy of the license with it.
+
+# Documentation at http://www.voidspace.org.uk/python/odict.html
+# For information about bugfixes, updates and support, please join the
+# Pythonutils mailing list:
+# http://groups.google.com/group/pythonutils/
+# Comments, suggestions and bug reports welcome.
+
+"""A dict that keeps keys in insertion order"""
+from __future__ import generators
+
+__author__ = ('Nicola Larosa <ni...@m-tekNico.net>,'
+    'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>')
+
+__docformat__ = "restructuredtext en"
+
+__revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $'
+
+__version__ = '0.2.2'
+
+__all__ = ['OrderedDict', 'SequenceOrderedDict']
+
+import sys
+INTP_VER = sys.version_info[:2]
+if INTP_VER < (2, 2):
+    raise RuntimeError("Python v.2.2 or later required")
+
+import types, warnings
+
+class OrderedDict(dict):
+    """
+    A class of dictionary that keeps the insertion order of keys.
+    
+    All appropriate methods return keys, items, or values in an ordered way.
+    
+    All normal dictionary methods are available. Update and comparison is
+    restricted to other OrderedDict objects.
+    
+    Various sequence methods are available, including the ability to explicitly
+    mutate the key ordering.
+    
+    __contains__ tests:
+    
+    >>> d = OrderedDict(((1, 3),))
+    >>> 1 in d
+    1
+    >>> 4 in d
+    0
+    
+    __getitem__ tests:
+    
+    >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[2]
+    1
+    >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[4]
+    Traceback (most recent call last):
+    KeyError: 4
+    
+    __len__ tests:
+    
+    >>> len(OrderedDict())
+    0
+    >>> len(OrderedDict(((1, 3), (3, 2), (2, 1))))
+    3
+    
+    get tests:
+    
+    >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+    >>> d.get(1)
+    3
+    >>> d.get(4) is None
+    1
+    >>> d.get(4, 5)
+    5
+    >>> d
+    OrderedDict([(1, 3), (3, 2), (2, 1)])
+    
+    has_key tests:
+    
+    >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+    >>> d.has_key(1)
+    1
+    >>> d.has_key(4)
+    0
+    """
+
+    def __init__(self, init_val=(), strict=False):
+        """
+        Create a new ordered dictionary. Cannot init from a normal dict,
+        nor from kwargs, since items order is undefined in those cases.
+        
+        If the ``strict`` keyword argument is ``True`` (``False`` is the
+        default) then when doing slice assignment - the ``OrderedDict`` you are
+        assigning from *must not* contain any keys in the remaining dict.
+        
+        >>> OrderedDict()
+        OrderedDict([])
+        >>> OrderedDict({1: 1})
+        Traceback (most recent call last):
+        TypeError: undefined order, cannot get items from dict
+        >>> OrderedDict({1: 1}.items())
+        OrderedDict([(1, 1)])
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d
+        OrderedDict([(1, 3), (3, 2), (2, 1)])
+        >>> OrderedDict(d)
+        OrderedDict([(1, 3), (3, 2), (2, 1)])
+        """
+        self.strict = strict
+        dict.__init__(self)
+        if isinstance(init_val, OrderedDict):
+            self._sequence = init_val.keys()
+            dict.update(self, init_val)
+        elif isinstance(init_val, dict):
+            # we lose compatibility with other ordered dict types this way
+            raise TypeError('undefined order, cannot get items from dict')
+        else:
+            self._sequence = []
+            self.update(init_val)
+
+### Special methods ###
+
+    def __delitem__(self, key):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> del d[3]
+        >>> d
+        OrderedDict([(1, 3), (2, 1)])
+        >>> del d[3]
+        Traceback (most recent call last):
+        KeyError: 3
+        >>> d[3] = 2
+        >>> d
+        OrderedDict([(1, 3), (2, 1), (3, 2)])
+        >>> del d[0:1]
+        >>> d
+        OrderedDict([(2, 1), (3, 2)])
+        """
+        if isinstance(key, types.SliceType):
+            # FIXME: efficiency?
+            keys = self._sequence[key]
+            for entry in keys:
+                dict.__delitem__(self, entry)
+            del self._sequence[key]
+        else:
+            # do the dict.__delitem__ *first* as it raises
+            # the more appropriate error
+            dict.__delitem__(self, key)
+            self._sequence.remove(key)
+
+    def __eq__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d == OrderedDict(d)
+        True
+        >>> d == OrderedDict(((1, 3), (2, 1), (3, 2)))
+        False
+        >>> d == OrderedDict(((1, 0), (3, 2), (2, 1)))
+        False
+        >>> d == OrderedDict(((0, 3), (3, 2), (2, 1)))
+        False
+        >>> d == dict(d)
+        False
+        >>> d == False
+        False
+        """
+        if isinstance(other, OrderedDict):
+            # FIXME: efficiency?
+            #   Generate both item lists for each compare
+            return (self.items() == other.items())
+        else:
+            return False
+
+    def __lt__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+        >>> c < d
+        True
+        >>> d < c
+        False
+        >>> d < dict(c)
+        Traceback (most recent call last):
+        TypeError: Can only compare with other OrderedDicts
+        """
+        if not isinstance(other, OrderedDict):
+            raise TypeError('Can only compare with other OrderedDicts')
+        # FIXME: efficiency?
+        #   Generate both item lists for each compare
+        return (self.items() < other.items())
+
+    def __le__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+        >>> e = OrderedDict(d)
+        >>> c <= d
+        True
+        >>> d <= c
+        False
+        >>> d <= dict(c)
+        Traceback (most recent call last):
+        TypeError: Can only compare with other OrderedDicts
+        >>> d <= e
+        True
+        """
+        if not isinstance(other, OrderedDict):
+            raise TypeError('Can only compare with other OrderedDicts')
+        # FIXME: efficiency?
+        #   Generate both item lists for each compare
+        return (self.items() <= other.items())
+
+    def __ne__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d != OrderedDict(d)
+        False
+        >>> d != OrderedDict(((1, 3), (2, 1), (3, 2)))
+        True
+        >>> d != OrderedDict(((1, 0), (3, 2), (2, 1)))
+        True
+        >>> d == OrderedDict(((0, 3), (3, 2), (2, 1)))
+        False
+        >>> d != dict(d)
+        True
+        >>> d != False
+        True
+        """
+        if isinstance(other, OrderedDict):
+            # FIXME: efficiency?
+            #   Generate both item lists for each compare
+            return not (self.items() == other.items())
+        else:
+            return True
+
+    def __gt__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+        >>> d > c
+        True
+        >>> c > d
+        False
+        >>> d > dict(c)
+        Traceback (most recent call last):
+        TypeError: Can only compare with other OrderedDicts
+        """
+        if not isinstance(other, OrderedDict):
+            raise TypeError('Can only compare with other OrderedDicts')
+        # FIXME: efficiency?
+        #   Generate both item lists for each compare
+        return (self.items() > other.items())
+
+    def __ge__(self, other):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+        >>> e = OrderedDict(d)
+        >>> c >= d
+        False
+        >>> d >= c
+        True
+        >>> d >= dict(c)
+        Traceback (most recent call last):
+        TypeError: Can only compare with other OrderedDicts
+        >>> e >= d
+        True
+        """
+        if not isinstance(other, OrderedDict):
+            raise TypeError('Can only compare with other OrderedDicts')
+        # FIXME: efficiency?
+        #   Generate both item lists for each compare
+        return (self.items() >= other.items())
+
+    def __repr__(self):
+        """
+        Used for __repr__ and __str__
+        
+        >>> r1 = repr(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f'))))
+        >>> r1
+        "OrderedDict([('a', 'b'), ('c', 'd'), ('e', 'f')])"
+        >>> r2 = repr(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd'))))
+        >>> r2
+        "OrderedDict([('a', 'b'), ('e', 'f'), ('c', 'd')])"
+        >>> r1 == str(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f'))))
+        True
+        >>> r2 == str(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd'))))
+        True
+        """
+        return '%s([%s])' % (self.__class__.__name__, ', '.join(
+            ['(%r, %r)' % (key, self[key]) for key in self._sequence]))
+
+    def __setitem__(self, key, val):
+        """
+        Allows slice assignment, so long as the slice is an OrderedDict
+        >>> d = OrderedDict()
+        >>> d['a'] = 'b'
+        >>> d['b'] = 'a'
+        >>> d[3] = 12
+        >>> d
+        OrderedDict([('a', 'b'), ('b', 'a'), (3, 12)])
+        >>> d[:] = OrderedDict(((1, 2), (2, 3), (3, 4)))
+        >>> d
+        OrderedDict([(1, 2), (2, 3), (3, 4)])
+        >>> d[::2] = OrderedDict(((7, 8), (9, 10)))
+        >>> d
+        OrderedDict([(7, 8), (2, 3), (9, 10)])
+        >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)))
+        >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8)))
+        >>> d
+        OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)])
+        >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)), strict=True)
+        >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8)))
+        >>> d
+        OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)])
+        
+        >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)), strict=True)
+        >>> a[3] = 4
+        >>> a
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
+        Traceback (most recent call last):
+        ValueError: slice assignment must be from unique keys
+        >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)))
+        >>> a[3] = 4
+        >>> a
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a
+        OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a[::-1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> a
+        OrderedDict([(3, 4), (2, 3), (1, 2), (0, 1)])
+        
+        >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> d[:1] = 3
+        Traceback (most recent call last):
+        TypeError: slice assignment requires an OrderedDict
+        
+        >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+        >>> d[:1] = OrderedDict([(9, 8)])
+        >>> d
+        OrderedDict([(9, 8), (1, 2), (2, 3), (3, 4)])
+        """
+        if isinstance(key, types.SliceType):
+            if not isinstance(val, OrderedDict):
+                # FIXME: allow a list of tuples?
+                raise TypeError('slice assignment requires an OrderedDict')
+            keys = self._sequence[key]
+            # NOTE: Could use ``range(*key.indices(len(self._sequence)))``
+            indexes = range(len(self._sequence))[key]
+            if key.step is None:
+                # NOTE: new slice may not be the same size as the one being
+                #   overwritten !
+                # NOTE: What is the algorithm for an impossible slice?
+                #   e.g. d[5:3]
+                pos = key.start or 0
+                del self[key]
+                newkeys = val.keys()
+                for k in newkeys:
+                    if k in self:
+                        if self.strict:
+                            raise ValueError('slice assignment must be from '
+                                'unique keys')
+                        else:
+                            # NOTE: This removes duplicate keys *first*
+                            #   so start position might have changed?
+                            del self[k]
+                self._sequence = (self._sequence[:pos] + newkeys +
+                    self._sequence[pos:])
+                dict.update(self, val)
+            else:
+                # extended slice - length of new slice must be the same
+                # as the one being replaced
+                if len(keys) != len(val):
+                    raise ValueError('attempt to assign sequence of size %s '
+                        'to extended slice of size %s' % (len(val), len(keys)))
+                # FIXME: efficiency?
+                del self[key]
+                item_list = zip(indexes, val.items())
+                # smallest indexes first - higher indexes not guaranteed to
+                # exist
+                item_list.sort()
+                for pos, (newkey, newval) in item_list:
+                    if self.strict and newkey in self:
+                        raise ValueError('slice assignment must be from unique'
+                            ' keys')
+                    self.insert(pos, newkey, newval)
+        else:
+            if key not in self:
+                self._sequence.append(key)
+            dict.__setitem__(self, key, val)
+
+    def __getitem__(self, key):
+        """
+        Allows slicing. Returns an OrderedDict if you slice.
+        >>> b = OrderedDict([(7, 0), (6, 1), (5, 2), (4, 3), (3, 4), (2, 5), (1, 6)])
+        >>> b[::-1]
+        OrderedDict([(1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 0)])
+        >>> b[2:5]
+        OrderedDict([(5, 2), (4, 3), (3, 4)])
+        >>> type(b[2:4])
+        <class '__main__.OrderedDict'>
+        """
+        if isinstance(key, types.SliceType):
+            # FIXME: does this raise the error we want?
+            keys = self._sequence[key]
+            # FIXME: efficiency?
+            return OrderedDict([(entry, self[entry]) for entry in keys])
+        else:
+            return dict.__getitem__(self, key)
+
+    __str__ = __repr__
+
+    def __setattr__(self, name, value):
+        """
+        Implemented so that accesses to ``sequence`` raise a warning and are
+        diverted to the new ``setkeys`` method.
+        """
+        if name == 'sequence':
+            warnings.warn('Use of the sequence attribute is deprecated.'
+                ' Use the keys method instead.', DeprecationWarning)
+            # NOTE: doesn't return anything
+            self.setkeys(value)
+        else:
+            # FIXME: do we want to allow arbitrary setting of attributes?
+            #   Or do we want to manage it?
+            object.__setattr__(self, name, value)
+
+    def __getattr__(self, name):
+        """
+        Implemented so that access to ``sequence`` raises a warning.
+        
+        >>> d = OrderedDict()
+        >>> d.sequence
+        []
+        """
+        if name == 'sequence':
+            warnings.warn('Use of the sequence attribute is deprecated.'
+                ' Use the keys method instead.', DeprecationWarning)
+            # NOTE: Still (currently) returns a direct reference. Need to
+            #   because code that uses sequence will expect to be able to
+            #   mutate it in place.
+            return self._sequence
+        else:
+            # raise the appropriate error
+            raise AttributeError("OrderedDict has no '%s' attribute" % name)
+
+    def __deepcopy__(self, memo):
+        """
+        To allow deepcopy to work with OrderedDict.
+        
+        >>> from copy import deepcopy
+        >>> a = OrderedDict([(1, 1), (2, 2), (3, 3)])
+        >>> a['test'] = {}
+        >>> b = deepcopy(a)
+        >>> b == a
+        True
+        >>> b is a
+        False
+        >>> a['test'] is b['test']
+        False
+        """
+        from copy import deepcopy
+        return self.__class__(deepcopy(self.items(), memo), self.strict)
+
+
+### Read-only methods ###
+
+    def copy(self):
+        """
+        >>> OrderedDict(((1, 3), (3, 2), (2, 1))).copy()
+        OrderedDict([(1, 3), (3, 2), (2, 1)])
+        """
+        return OrderedDict(self)
+
+    def items(self):
+        """
+        ``items`` returns a list of tuples representing all the 
+        ``(key, value)`` pairs in the dictionary.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.items()
+        [(1, 3), (3, 2), (2, 1)]
+        >>> d.clear()
+        >>> d.items()
+        []
+        """
+        return zip(self._sequence, self.values())
+
+    def keys(self):
+        """
+        Return a list of keys in the ``OrderedDict``.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.keys()
+        [1, 3, 2]
+        """
+        return self._sequence[:]
+
+    def values(self, values=None):
+        """
+        Return a list of all the values in the OrderedDict.
+        
+        Optionally you can pass in a list of values, which will replace the
+        current list. The value list must be the same len as the OrderedDict.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.values()
+        [3, 2, 1]
+        """
+        return [self[key] for key in self._sequence]
+
+    def iteritems(self):
+        """
+        >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iteritems()
+        >>> ii.next()
+        (1, 3)
+        >>> ii.next()
+        (3, 2)
+        >>> ii.next()
+        (2, 1)
+        >>> ii.next()
+        Traceback (most recent call last):
+        StopIteration
+        """
+        def make_iter(self=self):
+            keys = self.iterkeys()
+            while True:
+                key = keys.next()
+                yield (key, self[key])
+        return make_iter()
+
+    def iterkeys(self):
+        """
+        >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iterkeys()
+        >>> ii.next()
+        1
+        >>> ii.next()
+        3
+        >>> ii.next()
+        2
+        >>> ii.next()
+        Traceback (most recent call last):
+        StopIteration
+        """
+        return iter(self._sequence)
+
+    __iter__ = iterkeys
+
+    def itervalues(self):
+        """
+        >>> iv = OrderedDict(((1, 3), (3, 2), (2, 1))).itervalues()
+        >>> iv.next()
+        3
+        >>> iv.next()
+        2
+        >>> iv.next()
+        1
+        >>> iv.next()
+        Traceback (most recent call last):
+        StopIteration
+        """
+        def make_iter(self=self):
+            keys = self.iterkeys()
+            while True:
+                yield self[keys.next()]
+        return make_iter()
+
+### Read-write methods ###
+
+    def clear(self):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.clear()
+        >>> d
+        OrderedDict([])
+        """
+        dict.clear(self)
+        self._sequence = []
+
+    def pop(self, key, *args):
+        """
+        No dict.pop in Python 2.2, gotta reimplement it
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.pop(3)
+        2
+        >>> d
+        OrderedDict([(1, 3), (2, 1)])
+        >>> d.pop(4)
+        Traceback (most recent call last):
+        KeyError: 4
+        >>> d.pop(4, 0)
+        0
+        >>> d.pop(4, 0, 1)
+        Traceback (most recent call last):
+        TypeError: pop expected at most 2 arguments, got 3
+        """
+        if len(args) > 1:
+            raise TypeError, ('pop expected at most 2 arguments, got %s' %
+                (len(args) + 1))
+        if key in self:
+            val = self[key]
+            del self[key]
+        else:
+            try:
+                val = args[0]
+            except IndexError:
+                raise KeyError(key)
+        return val
+
+    def popitem(self, i=-1):
+        """
+        Delete and return an item specified by index, not a random one as in
+        dict. The index is -1 by default (the last item).
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.popitem()
+        (2, 1)
+        >>> d
+        OrderedDict([(1, 3), (3, 2)])
+        >>> d.popitem(0)
+        (1, 3)
+        >>> OrderedDict().popitem()
+        Traceback (most recent call last):
+        KeyError: 'popitem(): dictionary is empty'
+        >>> d.popitem(2)
+        Traceback (most recent call last):
+        IndexError: popitem(): index 2 not valid
+        """
+        if not self._sequence:
+            raise KeyError('popitem(): dictionary is empty')
+        try:
+            key = self._sequence[i]
+        except IndexError:
+            raise IndexError('popitem(): index %s not valid' % i)
+        return (key, self.pop(key))
+
+    def setdefault(self, key, defval = None):
+        """
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.setdefault(1)
+        3
+        >>> d.setdefault(4) is None
+        True
+        >>> d
+        OrderedDict([(1, 3), (3, 2), (2, 1), (4, None)])
+        >>> d.setdefault(5, 0)
+        0
+        >>> d
+        OrderedDict([(1, 3), (3, 2), (2, 1), (4, None), (5, 0)])
+        """
+        if key in self:
+            return self[key]
+        else:
+            self[key] = defval
+            return defval
+
+    def update(self, from_od):
+        """
+        Update from another OrderedDict or sequence of (key, value) pairs
+        
+        >>> d = OrderedDict(((1, 0), (0, 1)))
+        >>> d.update(OrderedDict(((1, 3), (3, 2), (2, 1))))
+        >>> d
+        OrderedDict([(1, 3), (0, 1), (3, 2), (2, 1)])
+        >>> d.update({4: 4})
+        Traceback (most recent call last):
+        TypeError: undefined order, cannot get items from dict
+        >>> d.update((4, 4))
+        Traceback (most recent call last):
+        TypeError: cannot convert dictionary update sequence element "4" to a 2-item sequence
+        """
+        if isinstance(from_od, OrderedDict):
+            for key, val in from_od.items():
+                self[key] = val
+        elif isinstance(from_od, dict):
+            # we lose compatibility with other ordered dict types this way
+            raise TypeError('undefined order, cannot get items from dict')
+        else:
+            # FIXME: efficiency?
+            # sequence of 2-item sequences, or error
+            for item in from_od:
+                try:
+                    key, val = item
+                except TypeError:
+                    raise TypeError('cannot convert dictionary update'
+                        ' sequence element "%s" to a 2-item sequence' % item)
+                self[key] = val
+
+    def rename(self, old_key, new_key):
+        """
+        Rename the key for a given value, without modifying sequence order.
+        
+        For the case where new_key already exists this raise an exception,
+        since if new_key exists, it is ambiguous as to what happens to the
+        associated values, and the position of new_key in the sequence.
+        
+        >>> od = OrderedDict()
+        >>> od['a'] = 1
+        >>> od['b'] = 2
+        >>> od.items()
+        [('a', 1), ('b', 2)]
+        >>> od.rename('b', 'c')
+        >>> od.items()
+        [('a', 1), ('c', 2)]
+        >>> od.rename('c', 'a')
+        Traceback (most recent call last):
+        ValueError: New key already exists: 'a'
+        >>> od.rename('d', 'b')
+        Traceback (most recent call last):
+        KeyError: 'd'
+        """
+        if new_key == old_key:
+            # no-op
+            return
+        if new_key in self:
+            raise ValueError("New key already exists: %r" % new_key)
+        # rename sequence entry
+        value = self[old_key] 
+        old_idx = self._sequence.index(old_key)
+        self._sequence[old_idx] = new_key
+        # rename internal dict entry
+        dict.__delitem__(self, old_key)
+        dict.__setitem__(self, new_key, value)
+
+    def setitems(self, items):
+        """
+        This method allows you to set the items in the dict.
+        
+        It takes a list of tuples - of the same sort returned by the ``items``
+        method.
+        
+        >>> d = OrderedDict()
+        >>> d.setitems(((3, 1), (2, 3), (1, 2)))
+        >>> d
+        OrderedDict([(3, 1), (2, 3), (1, 2)])
+        """
+        self.clear()
+        # FIXME: this allows you to pass in an OrderedDict as well :-)
+        self.update(items)
+
+    def setkeys(self, keys):
+        """
+        ``setkeys`` all ows you to pass in a new list of keys which will
+        replace the current set. This must contain the same set of keys, but
+        need not be in the same order.
+        
+        If you pass in new keys that don't match, a ``KeyError`` will be
+        raised.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.keys()
+        [1, 3, 2]
+        >>> d.setkeys((1, 2, 3))
+        >>> d
+        OrderedDict([(1, 3), (2, 1), (3, 2)])
+        >>> d.setkeys(['a', 'b', 'c'])
+        Traceback (most recent call last):
+        KeyError: 'Keylist is not the same as current keylist.'
+        """
+        # FIXME: Efficiency? (use set for Python 2.4 :-)
+        # NOTE: list(keys) rather than keys[:] because keys[:] returns
+        #   a tuple, if keys is a tuple.
+        kcopy = list(keys)
+        kcopy.sort()
+        self._sequence.sort()
+        if kcopy != self._sequence:
+            raise KeyError('Keylist is not the same as current keylist.')
+        # NOTE: This makes the _sequence attribute a new object, instead
+        #       of changing it in place.
+        # FIXME: efficiency?
+        self._sequence = list(keys)
+
+    def setvalues(self, values):
+        """
+        You can pass in a list of values, which will replace the
+        current list. The value list must be the same len as the OrderedDict.
+        
+        (Or a ``ValueError`` is raised.)
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.setvalues((1, 2, 3))
+        >>> d
+        OrderedDict([(1, 1), (3, 2), (2, 3)])
+        >>> d.setvalues([6])
+        Traceback (most recent call last):
+        ValueError: Value list is not the same length as the OrderedDict.
+        """
+        if len(values) != len(self):
+            # FIXME: correct error to raise?
+            raise ValueError('Value list is not the same length as the '
+                'OrderedDict.')
+        self.update(zip(self, values))
+
+### Sequence Methods ###
+
+    def index(self, key):
+        """
+        Return the position of the specified key in the OrderedDict.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.index(3)
+        1
+        >>> d.index(4)
+        Traceback (most recent call last):
+        ValueError: list.index(x): x not in list
+        """
+        return self._sequence.index(key)
+
+    def insert(self, index, key, value):
+        """
+        Takes ``index``, ``key``, and ``value`` as arguments.
+        
+        Sets ``key`` to ``value``, so that ``key`` is at position ``index`` in
+        the OrderedDict.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.insert(0, 4, 0)
+        >>> d
+        OrderedDict([(4, 0), (1, 3), (3, 2), (2, 1)])
+        >>> d.insert(0, 2, 1)
+        >>> d
+        OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2)])
+        >>> d.insert(8, 8, 1)
+        >>> d
+        OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2), (8, 1)])
+        """
+        if key in self:
+            # FIXME: efficiency?
+            del self[key]
+        self._sequence.insert(index, key)
+        dict.__setitem__(self, key, value)
+
+    def reverse(self):
+        """
+        Reverse the order of the OrderedDict.
+        
+        >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+        >>> d.reverse()
+        >>> d
+        OrderedDict([(2, 1), (3, 2), (1, 3)])
+        """
+        self._sequence.reverse()
+
+    def sort(self, *args, **kwargs):
+        """
+        Sort the key order in the OrderedDict.
+        
+        This method takes the same arguments as the ``list.sort`` method on
+        your version of Python.
+        
+        >>> d = OrderedDict(((4, 1), (2, 2), (3, 3), (1, 4)))
+        >>> d.sort()
+        >>> d
+        OrderedDict([(1, 4), (2, 2), (3, 3), (4, 1)])
+        """
+        self._sequence.sort(*args, **kwargs)
+
+class Keys(object):
+    # FIXME: should this object be a subclass of list?
+    """
+    Custom object for accessing the keys of an OrderedDict.
+    
+    Can be called like the normal ``OrderedDict.keys`` method, but also
+    supports indexing and sequence methods.
+    """
+
+    def __init__(self, main):
+        self._main = main
+
+    def __call__(self):
+        """Pretend to be the keys method."""
+        return self._main._keys()
+
+    def __getitem__(self, index):
+        """Fetch the key at position i."""
+        # NOTE: this automatically supports slicing :-)
+        return self._main._sequence[index]
+
+    def __setitem__(self, index, name):
+        """
+        You cannot assign to keys, but you can do slice assignment to re-order
+        them.
+        
+        You can only do slice assignment if the new set of keys is a reordering
+        of the original set.
+        """
+        if isinstance(index, types.SliceType):
+            # FIXME: efficiency?
+            # check length is the same
+            indexes = range(len(self._main._sequence))[index]
+            if len(indexes) != len(name):
+                raise ValueError('attempt to assign sequence of size %s '
+                    'to slice of size %s' % (len(name), len(indexes)))
+            # check they are the same keys
+            # FIXME: Use set
+            old_keys = self._main._sequence[index]
+            new_keys = list(name)
+            old_keys.sort()
+            new_keys.sort()
+            if old_keys != new_keys:
+                raise KeyError('Keylist is not the same as current keylist.')
+            orig_vals = [self._main[k] for k in name]
+            del self._main[index]
+            vals = zip(indexes, name, orig_vals)
+            vals.sort()
+            for i, k, v in vals:
+                if self._main.strict and k in self._main:
+                    raise ValueError('slice assignment must be from '
+                        'unique keys')
+                self._main.insert(i, k, v)
+        else:
+            raise ValueError('Cannot assign to keys')
+
+    ### following methods pinched from UserList and adapted ###
+    def __repr__(self): return repr(self._main._sequence)
+
+    # FIXME: do we need to check if we are comparing with another ``Keys``
+    #   object? (like the __cast method of UserList)
+    def __lt__(self, other): return self._main._sequence <  other
+    def __le__(self, other): return self._main._sequence <= other
+    def __eq__(self, other): return self._main._sequence == other
+    def __ne__(self, other): return self._main._sequence != other
+    def __gt__(self, other): return self._main._sequence >  other
+    def __ge__(self, other): return self._main._sequence >= other
+    # FIXME: do we need __cmp__ as well as rich comparisons?
+    def __cmp__(self, other): return cmp(self._main._sequence, other)
+
+    def __contains__(self, item): return item in self._main._sequence
+    def __len__(self): return len(self._main._sequence)
+    def __iter__(self): return self._main.iterkeys()
+    def count(self, item): return self._main._sequence.count(item)
+    def index(self, item, *args): return self._main._sequence.index(item, *args)
+    def reverse(self): self._main._sequence.reverse()
+    def sort(self, *args, **kwds): self._main._sequence.sort(*args, **kwds)
+    def __mul__(self, n): return self._main._sequence*n
+    __rmul__ = __mul__
+    def __add__(self, other): return self._main._sequence + other
+    def __radd__(self, other): return other + self._main._sequence
+
+    ## following methods not implemented for keys ##
+    def __delitem__(self, i): raise TypeError('Can\'t delete items from keys')
+    def __iadd__(self, other): raise TypeError('Can\'t add in place to keys')
+    def __imul__(self, n): raise TypeError('Can\'t multiply keys in place')
+    def append(self, item): raise TypeError('Can\'t append items to keys')
+    def insert(self, i, item): raise TypeError('Can\'t insert items into keys')
+    def pop(self, i=-1): raise TypeError('Can\'t pop items from keys')
+    def remove(self, item): raise TypeError('Can\'t remove items from keys')
+    def extend(self, other): raise TypeError('Can\'t extend keys')
+
+class Items(object):
+    """
+    Custom object for accessing the items of an OrderedDict.
+    
+    Can be called like the normal ``OrderedDict.items`` method, but also
+    supports indexing and sequence methods.
+    """
+
+    def __init__(self, main):
+        self._main = main
+
+    def __call__(self):
+        """Pretend to be the items method."""
+        return self._main._items()
+
+    def __getitem__(self, index):
+        """Fetch the item at position i."""
+        if isinstance(index, types.SliceType):
+            # fetching a slice returns an OrderedDict
+            return self._main[index].items()
+        key = self._main._sequence[index]
+        return (key, self._main[key])
+
+    def __setitem__(self, index, item):
+        """Set item at position i to item."""
+        if isinstance(index, types.SliceType):
+            # NOTE: item must be an iterable (list of tuples)
+            self._main[index] = OrderedDict(item)
+        else:
+            # FIXME: Does this raise a sensible error?
+            orig = self._main.keys[index]
+            key, value = item
+            if self._main.strict and key in self and (key != orig):
+                raise ValueError('slice assignment must be from '
+                        'unique keys')
+            # delete the current one
+            del self._main[self._main._sequence[index]]
+            self._main.insert(index, key, value)
+
+    def __delitem__(self, i):
+        """Delete the item at position i."""
+        key = self._main._sequence[i]
+        if isinstance(i, types.SliceType):
+            for k in key:
+                # FIXME: efficiency?
+                del self._main[k]
+        else:
+            del self._main[key]
+
+    ### following methods pinched from UserList and adapted ###
+    def __repr__(self): return repr(self._main.items())
+
+    # FIXME: do we need to check if we are comparing with another ``Items``
+    #   object? (like the __cast method of UserList)
+    def __lt__(self, other): return self._main.items() <  other
+    def __le__(self, other): return self._main.items() <= other
+    def __eq__(self, other): return self._main.items() == other
+    def __ne__(self, other): return self._main.items() != other
+    def __gt__(self, other): return self._main.items() >  other
+    def __ge__(self, other): return self._main.items() >= other
+    def __cmp__(self, other): return cmp(self._main.items(), other)
+
+    def __contains__(self, item): return item in self._main.items()
+    def __len__(self): return len(self._main._sequence) # easier :-)
+    def __iter__(self): return self._main.iteritems()
+    def count(self, item): return self._main.items().count(item)
+    def index(self, item, *args): return self._main.items().index(item, *args)
+    def reverse(self): self._main.reverse()
+    def sort(self, *args, **kwds): self._main.sort(*args, **kwds)
+    def __mul__(self, n): return self._main.items()*n
+    __rmul__ = __mul__
+    def __add__(self, other): return self._main.items() + other
+    def __radd__(self, other): return other + self._main.items()
+
+    def append(self, item):
+        """Add an item to the end."""
+        # FIXME: this is only append if the key isn't already present
+        key, value = item
+        self._main[key] = value
+
+    def insert(self, i, item):
+        key, value = item
+        self._main.insert(i, key, value)
+
+    def pop(self, i=-1):
+        key = self._main._sequence[i]
+        return (key, self._main.pop(key))
+
+    def remove(self, item):
+        key, value = item
+        try:
+            assert value == self._main[key]
+        except (KeyError, AssertionError):
+            raise ValueError('ValueError: list.remove(x): x not in list')
+        else:
+            del self._main[key]
+
+    def extend(self, other):
+        # FIXME: is only a true extend if none of the keys already present
+        for item in other:
+            key, value = item
+            self._main[key] = value
+
+    def __iadd__(self, other):
+        self.extend(other)
+
+    ## following methods not implemented for items ##
+
+    def __imul__(self, n): raise TypeError('Can\'t multiply items in place')
+
+class Values(object):
+    """
+    Custom object for accessing the values of an OrderedDict.
+    
+    Can be called like the normal ``OrderedDict.values`` method, but also
+    supports indexing and sequence methods.
+    """
+
+    def __init__(self, main):
+        self._main = main
+
+    def __call__(self):
+        """Pretend to be the values method."""
+        return self._main._values()
+
+    def __getitem__(self, index):
+        """Fetch the value at position i."""
+        if isinstance(index, types.SliceType):
+            return [self._main[key] for key in self._main._sequence[index]]
+        else:
+            return self._main[self._main._sequence[index]]
+
+    def __setitem__(self, index, value):
+        """
+        Set the value at position i to value.
+        
+        You can only do slice assignment to values if you supply a sequence of
+        equal length to the slice you are replacing.
+        """
+        if isinstance(index, types.SliceType):
+            keys = self._main._sequence[index]
+            if len(keys) != len(value):
+                raise ValueError('attempt to assign sequence of size %s '
+                    'to slice of size %s' % (len(name), len(keys)))
+            # FIXME: efficiency?  Would be better to calculate the indexes
+            #   directly from the slice object
+            # NOTE: the new keys can collide with existing keys (or even
+            #   contain duplicates) - these will overwrite
+            for key, val in zip(keys, value):
+                self._main[key] = val
+        else:
+            self._main[self._main._sequence[index]] = value
+
+    ### following methods pinched from UserList and adapted ###
+    def __repr__(self): return repr(self._main.values())
+
+    # FIXME: do we need to check if we are comparing with another ``Values``
+    #   object? (like the __cast method of UserList)
+    def __lt__(self, other): return self._main.values() <  other
+    def __le__(self, other): return self._main.values() <= other
+    def __eq__(self, other): return self._main.values() == other
+    def __ne__(self, other): return self._main.values() != other
+    def __gt__(self, other): return self._main.values() >  other
+    def __ge__(self, other): return self._main.values() >= other
+    def __cmp__(self, other): return cmp(self._main.values(), other)
+
+    def __contains__(self, item): return item in self._main.values()
+    def __len__(self): return len(self._main._sequence) # easier :-)
+    def __iter__(self): return self._main.itervalues()
+    def count(self, item): return self._main.values().count(item)
+    def index(self, item, *args): return self._main.values().index(item, *args)
+
+    def reverse(self):
+        """Reverse the values"""
+        vals = self._main.values()
+        vals.reverse()
+        # FIXME: efficiency
+        self[:] = vals
+
+    def sort(self, *args, **kwds):
+        """Sort the values."""
+        vals = self._main.values()
+        vals.sort(*args, **kwds)
+        self[:] = vals
+
+    def __mul__(self, n): return self._main.values()*n
+    __rmul__ = __mul__
+    def __add__(self, other): return self._main.values() + other
+    def __radd__(self, other): return other + self._main.values()
+
+    ## following methods not implemented for values ##
+    def __delitem__(self, i): raise TypeError('Can\'t delete items from values')
+    def __iadd__(self, other): raise TypeError('Can\'t add in place to values')
+    def __imul__(self, n): raise TypeError('Can\'t multiply values in place')
+    def append(self, item): raise TypeError('Can\'t append items to values')
+    def insert(self, i, item): raise TypeError('Can\'t insert items into values')
+    def pop(self, i=-1): raise TypeError('Can\'t pop items from values')
+    def remove(self, item): raise TypeError('Can\'t remove items from values')
+    def extend(self, other): raise TypeError('Can\'t extend values')
+
+class SequenceOrderedDict(OrderedDict):
+    """
+    Experimental version of OrderedDict that has a custom object for ``keys``,
+    ``values``, and ``items``.
+    
+    These are callable sequence objects that work as methods, or can be
+    manipulated directly as sequences.
+    
+    Test for ``keys``, ``items`` and ``values``.
+    
+    >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+    >>> d
+    SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+    >>> d.keys
+    [1, 2, 3]
+    >>> d.keys()
+    [1, 2, 3]
+    >>> d.setkeys((3, 2, 1))
+    >>> d
+    SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+    >>> d.setkeys((1, 2, 3))
+    >>> d.keys[0]
+    1
+    >>> d.keys[:]
+    [1, 2, 3]
+    >>> d.keys[-1]
+    3
+    >>> d.keys[-2]
+    2
+    >>> d.keys[0:2] = [2, 1]
+    >>> d
+    SequenceOrderedDict([(2, 3), (1, 2), (3, 4)])
+    >>> d.keys.reverse()
+    >>> d.keys
+    [3, 1, 2]
+    >>> d.keys = [1, 2, 3]
+    >>> d
+    SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+    >>> d.keys = [3, 1, 2]
+    >>> d
+    SequenceOrderedDict([(3, 4), (1, 2), (2, 3)])
+    >>> a = SequenceOrderedDict()
+    >>> b = SequenceOrderedDict()
+    >>> a.keys == b.keys
+    1
+    >>> a['a'] = 3
+    >>> a.keys == b.keys
+    0
+    >>> b['a'] = 3
+    >>> a.keys == b.keys
+    1
+    >>> b['b'] = 3
+    >>> a.keys == b.keys
+    0
+    >>> a.keys > b.keys
+    0
+    >>> a.keys < b.keys
+    1
+    >>> 'a' in a.keys
+    1
+    >>> len(b.keys)
+    2
+    >>> 'c' in d.keys
+    0
+    >>> 1 in d.keys
+    1
+    >>> [v for v in d.keys]
+    [3, 1, 2]
+    >>> d.keys.sort()
+    >>> d.keys
+    [1, 2, 3]
+    >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)), strict=True)
+    >>> d.keys[::-1] = [1, 2, 3]
+    >>> d
+    SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+    >>> d.keys[:2]
+    [3, 2]
+    >>> d.keys[:2] = [1, 3]
+    Traceback (most recent call last):
+    KeyError: 'Keylist is not the same as current keylist.'
+
+    >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+    >>> d
+    SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+    >>> d.values
+    [2, 3, 4]
+    >>> d.values()
+    [2, 3, 4]
+    >>> d.setvalues((4, 3, 2))
+    >>> d
+    SequenceOrderedDict([(1, 4), (2, 3), (3, 2)])
+    >>> d.values[::-1]
+    [2, 3, 4]
+    >>> d.values[0]
+    4
+    >>> d.values[-2]
+    3
+    >>> del d.values[0]
+    Traceback (most recent call last):
+    TypeError: Can't delete items from values
+    >>> d.values[::2] = [2, 4]
+    >>> d
+    SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+    >>> 7 in d.values
+    0
+    >>> len(d.values)
+    3
+    >>> [val for val in d.values]
+    [2, 3, 4]
+    >>> d.values[-1] = 2
+    >>> d.values.count(2)
+    2
+    >>> d.values.index(2)
+    0
+    >>> d.values[-1] = 7
+    >>> d.values
+    [2, 3, 7]
+    >>> d.values.reverse()
+    >>> d.values
+    [7, 3, 2]
+    >>> d.values.sort()
+    >>> d.values
+    [2, 3, 7]
+    >>> d.values.append('anything')
+    Traceback (most recent call last):
+    TypeError: Can't append items to values
+    >>> d.values = (1, 2, 3)
+    >>> d
+    SequenceOrderedDict([(1, 1), (2, 2), (3, 3)])
+    
+    >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+    >>> d
+    SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+    >>> d.items()
+    [(1, 2), (2, 3), (3, 4)]
+    >>> d.setitems([(3, 4), (2 ,3), (1, 2)])
+    >>> d
+    SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+    >>> d.items[0]
+    (3, 4)
+    >>> d.items[:-1]
+    [(3, 4), (2, 3)]
+    >>> d.items[1] = (6, 3)
+    >>> d.items
+    [(3, 4), (6, 3), (1, 2)]
+    >>> d.items[1:2] = [(9, 9)]
+    >>> d
+    SequenceOrderedDict([(3, 4), (9, 9), (1, 2)])
+    >>> del d.items[1:2]
+    >>> d
+    SequenceOrderedDict([(3, 4), (1, 2)])
+    >>> (3, 4) in d.items
+    1
+    >>> (4, 3) in d.items
+    0
+    >>> len(d.items)
+    2
+    >>> [v for v in d.items]
+    [(3, 4), (1, 2)]
+    >>> d.items.count((3, 4))
+    1
+    >>> d.items.index((1, 2))
+    1
+    >>> d.items.index((2, 1))
+    Traceback (most recent call last):
+    ValueError: list.index(x): x not in list
+    >>> d.items.reverse()
+    >>> d.items
+    [(1, 2), (3, 4)]
+    >>> d.items.reverse()
+    >>> d.items.sort()
+    >>> d.items
+    [(1, 2), (3, 4)]
+    >>> d.items.append((5, 6))
+    >>> d.items
+    [(1, 2), (3, 4), (5, 6)]
+    >>> d.items.insert(0, (0, 0))
+    >>> d.items
+    [(0, 0), (1, 2), (3, 4), (5, 6)]
+    >>> d.items.insert(-1, (7, 8))
+    >>> d.items
+    [(0, 0), (1, 2), (3, 4), (7, 8), (5, 6)]
+    >>> d.items.pop()
+    (5, 6)
+    >>> d.items
+    [(0, 0), (1, 2), (3, 4), (7, 8)]
+    >>> d.items.remove((1, 2))
+    >>> d.items
+    [(0, 0), (3, 4), (7, 8)]
+    >>> d.items.extend([(1, 2), (5, 6)])
+    >>> d.items
+    [(0, 0), (3, 4), (7, 8), (1, 2), (5, 6)]
+    """
+
+    def __init__(self, init_val=(), strict=True):
+        OrderedDict.__init__(self, init_val, strict=strict)
+        self._keys = self.keys
+        self._values = self.values
+        self._items = self.items
+        self.keys = Keys(self)
+        self.values = Values(self)
+        self.items = Items(self)
+        self._att_dict = {
+            'keys': self.setkeys,
+            'items': self.setitems,
+            'values': self.setvalues,
+        }
+
+    def __setattr__(self, name, value):
+        """Protect keys, items, and values."""
+        if not '_att_dict' in self.__dict__:
+            object.__setattr__(self, name, value)
+        else:
+            try:
+                fun = self._att_dict[name]
+            except KeyError:
+                OrderedDict.__setattr__(self, name, value)
+            else:
+                fun(value)
+
+if __name__ == '__main__':
+    if INTP_VER < (2, 3):
+        raise RuntimeError("Tests require Python v.2.3 or later")
+    # turn off warnings for tests
+    warnings.filterwarnings('ignore')
+    # run the code tests in doctest format
+    import doctest
+    m = sys.modules.get('__main__')
+    globs = m.__dict__.copy()
+    globs.update({
+        'INTP_VER': INTP_VER,
+    })
+    doctest.testmod(m, globs=globs)
+

Modified: hadoop/avro/trunk/src/doc/content/xdocs/spec.xml
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/doc/content/xdocs/spec.xml?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/doc/content/xdocs/spec.xml (original)
+++ hadoop/avro/trunk/src/doc/content/xdocs/spec.xml Tue May 19 23:35:39 2009
@@ -82,11 +82,11 @@
 
       </section>
 
-      <section id="schema_compound">
-        <title>Compound Types</title>
+      <section id="schema_complex">
+        <title>Complex Types</title>
         
-        <p>Avro supports three kinds of compound types: records,
-        arrays and unions.</p>
+        <p>Avro supports five kinds of complex types: records, enums,
+        arrays, maps and unions.</p>
 
         <section>
           <title>Records</title>
@@ -94,7 +94,7 @@
 	  <p>Records use the type name "record" and support two attributes:</p>
 	  <ul>
 	    <li><code>name</code>: a JSON string providing the name
-	    of the record (optional).</li>
+	    of the record (required).</li>
 	    <li><code>fields</code>: a JSON array, listing fields (required).
 	    Each field is a JSON object with the following attributes:
 	      <ul>
@@ -119,6 +119,7 @@
 		    <tr><td>boolean</td><td>boolean</td><td>true</td></tr>
 		    <tr><td>null</td><td>null</td><td>null</td></tr>
 		    <tr><td>record</td><td>object</td><td>{"a": 1}</td></tr>
+		    <tr><td>enum</td><td>string</td><td>"FOO"</td></tr>
 		    <tr><td>array</td><td>array</td><td>[1]</td></tr>
 		    <tr><td>map</td><td>object</td><td>{"a": 1}</td></tr>
 		  </table>
@@ -141,6 +142,26 @@
 	</section>
         
         <section>
+          <title>Enums</title>
+          
+	  <p>Enums use the type name "enum" and support the following
+	  attributes:</p>
+	  <ul>
+	    <li><code>name</code>: a JSON string providing the name
+	    of the enum (required).</li>
+	    <li><code>symbols</code>: a JSON array, listing symbols,
+	    as JSON strings (required).</li>
+	  </ul>
+	  <p>For example, playing card suits might be defined with:</p>
+	  <source>
+{ "type": "enum",
+  "name": "Suit",
+  "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
+}
+	  </source>
+	</section>
+        
+        <section>
           <title>Arrays</title>
           <p>Arrays use the type name <code>"array"</code> and support
           a single attribute:</p>
@@ -178,11 +199,11 @@
 	  reading and writing unions.)</p>
 	  <p>Unions may not immediately contain other unions.</p>
         </section>
-      </section> <!-- end compound types -->
+      </section> <!-- end complex types -->
 
       <section>
 	<title>Identifiers</title>
-        <p>Record and field names must:</p>
+        <p>Record, field and enum names must:</p>
 	<ul>
           <li>start with <code>[A-Za-z_]</code></li>
           <li>subsequently contain only <code>[A-Za-z0-9_]</code></li>
@@ -252,9 +273,9 @@
       </section>
 
 
-      <section id="serialize_compound">
-        <title>Compound Type Serialization</title>
-        <p>Compound types are serialized as follows:</p>
+      <section id="serialize_complex">
+        <title>Complex Type Serialization</title>
+        <p>Complex types are serialized as follows:</p>
 
         <section>
           <title>Records</title>
@@ -284,6 +305,20 @@
 	</section>
         
         <section>
+          <title>Enums</title>
+          <p>An enum is serialized by a <code>int</code>, representing
+          the zero-based position of the symbol in the schema.</p>
+	  <p>For example, consider the enum:</p>
+	  <source>
+{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
+	  </source>
+	  <p>This would be serialized by an <code>int</code> between
+	  zero and three, with zero indicating "A", and 3 indicating
+	  "D".</p>
+	</section>
+
+
+        <section>
           <title>Arrays</title>
           <p>Arrays are serialized as a series of <em>blocks</em>.
           Each block consists of a <code>long</code> <em>count</em>
@@ -363,7 +398,7 @@
 	      <source>00 02 61</source></li>
           </ul>
         </section>
-      </section> <!-- end compound types -->
+      </section> <!-- end complex types -->
 
     </section>
 
@@ -446,11 +481,11 @@
       <ul>
 	<li><em>name</em>, string, to distinguish it from other protocols;</li>
 	<li><em>namespace</em>, a string which qualifies the name;</li>
-	<li><em>types</em>, a list of record and error definitions.
-	  An error definition is just like a record definition except
-	  it uses "error" instead of "record".  Note that forward
-	  references to records and errors are not currently
-	  supported.</li>
+	<li><em>types</em>, a list of record, enum and error
+	  definitions.  An error definition is just like a record
+	  definition except it uses "error" instead of "record".  Note
+	  that forward references to records, enums and errors are not
+	  currently supported.</li>
 	<li><em>messages</em>, a JSON object whose keys are message
 	  names and whose values are objects whose attributes are
 	  described below.  No two messages may have the same name.</li>
@@ -667,6 +702,7 @@
 	  <ul>
 	    <li>both schemas are arrays whose item types match</li>
 	    <li>both schemas are maps whose value types match</li>
+	    <li>both schemas are enums whose names match</li>
 	    <li>both schemas are records with the same name</li>
 	    <li>either schema is a union</li>
 	    <li>both schemas have same primitive type</li>
@@ -695,6 +731,11 @@
 	  list order may also vary.</p>
 	</li>
 
+	<li><strong>if both are enums:</strong>
+	  <p>if the writer's symbol is not present in the reader's
+	    enum, then the enum value is unset.</p>
+	</li>
+
 	<li><strong>if both are arrays:</strong>
 	  <p>This resolution algorithm is applied recursively to the reader's and
 	    writer's array item schemas.</p>

Modified: hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java Tue May 19 23:35:39 2009
@@ -151,7 +151,7 @@
   public String getNamespace() { return namespace; }
 
   /** The types of this protocol. */
-  public Map<String,Schema> getTypes() { return types; }
+  public LinkedHashMap<String,Schema> getTypes() { return types; }
 
   /** The messages of this protocol. */
   public Map<String,Message> getMessages() { return messages; }

Modified: hadoop/avro/trunk/src/java/org/apache/avro/Schema.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/Schema.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/Schema.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/Schema.java Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -37,7 +38,8 @@
 /** An abstract data type.
  * <p>A schema may be one of:
  * <ul>
- * <li>An <i>record</i>, mapping field names to field value data;
+ * <li>A <i>record</i>, mapping field names to field value data;
+ * <li>An <i>enum</i>, containing one of a small set of symbols;
  * <li>An <i>array</i> of values, all of the same schema;
  * <li>A <i>map</i>, containing string/value pairs, of a declared schema;
  * <li>A <i>union</i> of other schemas;
@@ -61,7 +63,7 @@
 
   /** The type of a schema. */
   public enum Type
-  { RECORD, ARRAY, MAP, UNION, STRING, BYTES,
+  { RECORD, ENUM, ARRAY, MAP, UNION, STRING, BYTES,
       INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL };
 
   private final Type type;
@@ -96,6 +98,12 @@
      return new RecordSchema(name, namespace, isError);
   }
 
+  /** Create an enum schema. */
+  public static Schema createEnum(String name, String namespace,
+                                  List<String> values) {
+    return new EnumSchema(name, namespace, values);
+  }
+
   /** Create an array schema. */
   public static Schema createArray(Schema elementType) {
     return new ArraySchema(elementType);
@@ -129,14 +137,25 @@
     throw new AvroRuntimeException("Not a record: "+this);
   }
 
-  /** If this is a record, returns its name, if any. */
+  /** If this is an enum, return its symbols. */
+  public List<String> getEnumSymbols() {
+    throw new AvroRuntimeException("Not an enum: "+this);
+  }    
+
+  /** If this is an enum, return a symbol's ordinal value. */
+  public int getEnumOrdinal(String symbol) {
+    throw new AvroRuntimeException("Not an enum: "+this);
+  }    
+
+
+  /** If this is a record or enum, returns its name, if any. */
   public String getName() {
-    throw new AvroRuntimeException("Not a record: "+this);
+    throw new AvroRuntimeException("Not a record or enum: "+this);
   }
 
-  /** If this is a record, returns its namespace, if any. */
+  /** If this is a record or enum, returns its namespace, if any. */
   public String getNamespace() {
-    throw new AvroRuntimeException("Not a record: "+this);
+    throw new AvroRuntimeException("Not a record or enum: "+this);
   }
 
   /** Returns true if this record is an error type. */
@@ -283,6 +302,44 @@
     }
   }
 
+  private static class EnumSchema extends NamedSchema {
+    private final List<String> symbols;
+    private final Map<String,Integer> ordinals;
+    public EnumSchema(String name, String space, List<String> symbols) {
+      super(Type.ENUM, name, space);
+      this.symbols = symbols;
+      this.ordinals = new HashMap<String,Integer>();
+      int i = 0;
+      for (String symbol : symbols)
+        ordinals.put(symbol, i++);
+    }
+    public List<String> getEnumSymbols() { return symbols; }
+    public int getEnumOrdinal(String symbol) { return ordinals.get(symbol); }
+    public boolean equals(Object o) {
+      if (o == this) return true;
+      if (!(o instanceof EnumSchema)) return false;
+      EnumSchema that = (EnumSchema)o;
+      return equalNames(that) && symbols.equals(that.symbols);
+    }
+    public int hashCode() { return super.hashCode() + symbols.hashCode(); }
+    public String toString(Names names) {
+      if (this.equals(names.get(name))) return "\""+name+"\"";
+      else if (name != null) names.put(name, this);
+      StringBuilder buffer = new StringBuilder();
+      buffer.append("{\"type\": \"enum\", "
+                    +"\"name\": \""+name+"\", "
+                    +"\"symbols\": [");
+      int count = 0;
+      for (String symbol : symbols) {
+        buffer.append("\""+symbol+"\"");
+        if (++count < symbols.size())
+          buffer.append(", ");
+      }
+      buffer.append("]}");
+      return buffer.toString();
+    }
+  }
+
   private static class ArraySchema extends Schema {
     private final Schema elementType;
     public ArraySchema(Schema elementType) {
@@ -336,14 +393,18 @@
       this.types = types;
       int seen = 0;
       for (Schema type : types) {                 // check legality of union
-        if (type.getType() == Type.UNION)
+        switch (type.getType()) {
+        case UNION: 
           throw new AvroRuntimeException("Nested union: "+this);
-        int mask = 1 << type.getType().ordinal();
-        if (type.getType() == Type.RECORD && type.getName() != null)
-          continue;
-        if ((seen & mask) != 0)
-          throw new AvroRuntimeException("Ambiguous union: "+this);
-        seen |= mask;
+        case RECORD:
+          if (type.getName() != null)
+            continue;
+        default:
+          int mask = 1 << type.getType().ordinal();
+          if ((seen & mask) != 0)
+            throw new AvroRuntimeException("Ambiguous union: "+this);
+          seen |= mask;
+        }
       }
     }
     public List<Schema> getTypes() { return types; }
@@ -509,6 +570,20 @@
         }
         result.setFields(fields);
         return result;
+      } else if (type.equals("enum")) {           // enum
+        JsonNode nameNode = schema.getFieldValue("name");
+        String name = nameNode != null ? nameNode.getTextValue() : null;
+        JsonNode spaceNode = schema.getFieldValue("namespace");
+        String space = spaceNode!=null?spaceNode.getTextValue():names.space();
+        JsonNode symbolsNode = schema.getFieldValue("symbols");
+        if (symbolsNode == null || !symbolsNode.isArray())
+          throw new SchemaParseException("Enum has no symbols: "+schema);
+        List<String> symbols = new ArrayList<String>();
+        for (JsonNode n : symbolsNode)
+          symbols.add(n.getTextValue());
+        Schema result = new EnumSchema(name, space, symbols);
+        if (name != null) names.put(name, result);
+        return result;
       } else if (type.equals("array")) {          // array
         return new ArraySchema(parse(schema.getFieldValue("items"), names));
       } else if (type.equals("map")) {            // map

Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java Tue May 19 23:35:39 2009
@@ -107,6 +107,8 @@
         if (!validate(entry.getValue(), fields.get(entry.getKey())))
           return false;
       return true;
+    case ENUM:
+      return schema.getEnumSymbols().contains(datum);
     case ARRAY:
       if (!(datum instanceof GenericArray)) return false;
       for (Object element : (GenericArray)datum)

Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java Tue May 19 23:35:39 2009
@@ -67,6 +67,7 @@
       expected = resolveExpected(actual, expected);
     switch (actual.getType()) {
     case RECORD:  return readRecord(old, actual, expected, in);
+    case ENUM:    return readEnum(actual, expected, in);
     case ARRAY:   return readArray(old, actual, expected, in);
     case MAP:     return readMap(old, actual, expected, in);
     case STRING:  return readString(old, in);
@@ -85,12 +86,15 @@
     // first scan for exact match
     for (Schema branch : expected.getTypes())
       if (branch.getType() == actual.getType())
-        if (branch.getType() == Type.RECORD) {
+        switch (branch.getType()) {
+        case RECORD:
           String name = branch.getName();
           if (name == null || name.equals(actual.getName()))
             return branch;
-        } else
+          break;
+        default:
           return branch;
+        }
     // then scan match via numeric promotion
     for (Schema branch : expected.getTypes())
       switch (actual.getType()) {
@@ -203,6 +207,8 @@
         }
       }
       return record;
+    case ENUM:
+      return createEnum(json.getTextValue(), schema);
     case ARRAY:
       Object array = newArray(old, json.size());
       Schema element = schema.getElementType();
@@ -231,6 +237,20 @@
     }
   }
 
+  /** Called to read an enum value. May be overridden for alternate enum
+   * representations.  By default, returns the symbol as a String. */
+  protected Object readEnum(Schema actual, Schema expected, ValueReader in)
+    throws IOException {
+    String name = expected.getName();
+    if (name != null && !name.equals(actual.getName()))
+      throw new AvroTypeException("Expected "+expected+", found "+actual);
+    return createEnum(actual.getEnumSymbols().get(in.readInt()), expected);
+  }
+
+  /** Called to create an enum value. May be overridden for alternate enum
+   * representations.  By default, returns the symbol as a String. */
+  protected Object createEnum(String symbol, Schema schema) { return symbol; }
+
   /** Called to read an array instance.  May be overridden for alternate array
    * representations.*/
   @SuppressWarnings(value="unchecked")
@@ -355,6 +375,9 @@
       for (Map.Entry<String, Schema> entry : schema.getFieldSchemas())
         skip(entry.getValue(), in);
       break;
+    case ENUM:
+      in.readInt();
+      break;
     case ARRAY:
       Schema elementType = schema.getElementType();
       for (int l = (int)in.readLong(); l > 0; l = (int)in.readLong())

Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java Tue May 19 23:35:39 2009
@@ -53,6 +53,7 @@
     throws IOException {
     switch (schema.getType()) {
     case RECORD: writeRecord(schema, datum, out); break;
+    case ENUM:   writeEnum(schema, datum, out);   break;
     case ARRAY:  writeArray(schema, datum, out);  break;
     case MAP:    writeMap(schema, datum, out);    break;
     case UNION:
@@ -89,6 +90,13 @@
     return ((GenericRecord) record).get(field);
   }
   
+  /** Called to write an enum value.  May be overridden for alternate enum
+   * representations.*/
+  protected void writeEnum(Schema schema, Object datum, ValueWriter out)
+    throws IOException {
+    out.writeInt(schema.getEnumOrdinal((String)datum));
+  }
+  
   /** Called to write a array.  May be overridden for alternate array
    * representations.*/
   protected void writeArray(Schema schema, Object datum, ValueWriter out)
@@ -178,6 +186,7 @@
       if (!isRecord(datum)) return false;
       return (schema.getName() == null) ||
         schema.getName().equals(((GenericRecord)datum).getSchema().getName());
+    case ENUM:    return isEnum(datum);
     case ARRAY:   return isArray(datum);
     case MAP:     return isMap(datum);
     case STRING:  return isString(datum);
@@ -191,7 +200,7 @@
     default: throw new AvroRuntimeException("Unexpected type: " +schema);
     }
   }
-  
+
   /** Called by the default implementation of {@link #instanceOf}.*/
   protected boolean isArray(Object datum) {
     return datum instanceof GenericArray;
@@ -203,6 +212,11 @@
   }
 
   /** Called by the default implementation of {@link #instanceOf}.*/
+  protected boolean isEnum(Object datum) {
+    return datum instanceof String;
+  }
+  
+  /** Called by the default implementation of {@link #instanceOf}.*/
   protected boolean isMap(Object datum) {
     return (datum instanceof Map) && (!(datum instanceof GenericRecord));
   }

Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
 <p>Uses the following mapping:
 <ul>
 <li>Schema records are implemented as {@link org.apache.avro.generic.GenericRecord}.
+<li>Schema enums are implemented as {@link java.lang.String}.
 <li>Schema arrays are implemented as {@link org.apache.avro.generic.GenericArray}.
 <li>Schema maps are implemented as {@link java.util.Map}.
 <li>Schema strings are implemented as {@link org.apache.avro.util.Utf8}.

Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.Collections;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -64,6 +65,9 @@
         }
       }
       return true;
+    case ENUM:
+      return datum instanceof Enum
+        && schema.getEnumSymbols().contains(((Enum)datum).name());
     case ARRAY:
       if (!(datum instanceof GenericArray)) return false;
       for (Object element : (GenericArray)datum)
@@ -99,7 +103,7 @@
   public static Schema getSchema(java.lang.reflect.Type type) {
     Schema schema = SCHEMA_CACHE.get(type);
     if (schema == null) {
-      schema = createSchema(type, new HashMap<String,Schema>());
+      schema = createSchema(type, new LinkedHashMap<String,Schema>());
       SCHEMA_CACHE.put(type, schema);
     }
     return schema;
@@ -142,14 +146,27 @@
           throw new AvroTypeException("Map key class not Utf8: "+key);
         return Schema.createMap(createSchema(value, names));
       }
-    } else if (type instanceof Class) {             // record
+    } else if (type instanceof Class) {
       Class c = (Class)type;
-      String name = c.getSimpleName();            // FIXME: ignoring package
+      String name = c.getSimpleName();
+      String space = c.getPackage().getName();
+      
       Schema schema = names.get(name);
       if (schema == null) {
+
+        if (c.isEnum()) {                         // enum
+          List<String> symbols = new ArrayList<String>();
+          Enum[] constants = (Enum[])c.getEnumConstants();
+          for (int i = 0; i < constants.length; i++)
+            symbols.add(constants[i].name());
+          schema = Schema.createEnum(name, space, symbols);
+          names.put(name, schema);
+          return schema;
+        }
+                                                  // record
         LinkedHashMap<String,Schema.Field> fields =
           new LinkedHashMap<String,Schema.Field>();
-        schema = Schema.createRecord(name, c.getPackage().getName(),
+        schema = Schema.createRecord(name, space,
                                      Throwable.class.isAssignableFrom(c));
         if (!names.containsKey(name))
           names.put(name, schema);
@@ -178,6 +195,16 @@
       if ((method.getModifiers() & Modifier.STATIC) == 0)
         protocol.getMessages().put(method.getName(),
                                    getMessage(method, protocol));
+
+    // reverse types, since they were defined in reference order
+    List<Map.Entry<String,Schema>> names =
+      new ArrayList<Map.Entry<String,Schema>>();
+    names.addAll(protocol.getTypes().entrySet());
+    Collections.reverse(names);
+    protocol.getTypes().clear();
+    for (Map.Entry<String,Schema> name : names)
+      protocol.getTypes().put(name.getKey(), name.getValue());
+
     return protocol;
   }
 

Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java Tue May 19 23:35:39 2009
@@ -43,12 +43,7 @@
   }
 
   protected Object newRecord(Object old, Schema schema) {
-    Class c;
-    try {
-      c = Class.forName(packageName+schema.getName());
-    } catch (ClassNotFoundException e) {
-      throw new AvroRuntimeException(e);
-    }
+    Class c = getClass(schema);
     return(c.isInstance(old) ? old : newInstance(c));
   }
 
@@ -74,10 +69,31 @@
     addField(record, name, position, null);
   }
 
+  @SuppressWarnings("unchecked")
+  protected Object createEnum(String symbol, Schema schema) {
+    return Enum.valueOf(getClass(schema), symbol);
+  }
+
   private static final Class<?>[] EMPTY_ARRAY = new Class[]{};
   private static final Map<Class,Constructor> CTOR_CACHE =
     new ConcurrentHashMap<Class,Constructor>();
 
+  private Map<String,Class> classCache = new ConcurrentHashMap<String,Class>();
+
+  private Class getClass(Schema schema) {
+    String name = schema.getName();
+    Class c = classCache.get(name);
+    if (c == null) {
+      try {
+        c = Class.forName(packageName + name);
+        classCache.put(name, c);
+      } catch (ClassNotFoundException e) {
+        throw new AvroRuntimeException(e);
+      }
+    }
+    return c;
+  }
+
   /** Create a new instance of the named class. */
   @SuppressWarnings("unchecked")
   protected static Object newInstance(Class c) {
@@ -95,4 +111,6 @@
     }
     return result;
   }
+
 }
+

Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java Tue May 19 23:35:39 2009
@@ -45,6 +45,15 @@
     }
   }
   
+  protected void writeEnum(Schema schema, Object datum, ValueWriter out)
+    throws IOException {
+    out.writeInt(((Enum)datum).ordinal());
+  }
+
+  protected boolean isEnum(Object datum) {
+    return datum instanceof Enum;
+  }
+
   @Override
   protected boolean isRecord(Object datum) {
     return ReflectData.getSchema(datum.getClass()).getType() == Type.RECORD;

Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java Tue May 19 23:35:39 2009
@@ -102,7 +102,8 @@
 
   private Class paramType(Schema schema) throws ClassNotFoundException {
     switch (schema.getType()) {
-    case RECORD:  return Class.forName(packageName+schema.getName()); 
+    case RECORD:
+    case ENUM:    return Class.forName(packageName+schema.getName());
     case ARRAY:   return GenericArray.class;
     case MAP:     return Map.class;
     case UNION:   return Object.class;

Modified: hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java Tue May 19 23:35:39 2009
@@ -171,6 +171,19 @@
           compile(field.getValue(), null, d+1);
 
       break;
+    case ENUM:
+      buffer.append("\n");
+      line(d, ((d==0)?"public ":"")+"enum "+type+" { ");
+      StringBuilder b = new StringBuilder();
+      int count = 0;
+      for (String symbol : schema.getEnumSymbols()) {
+        b.append(symbol);
+        if (++count < schema.getEnumSymbols().size())
+          b.append(", ");
+      }
+      line(d+1, b.toString());
+      line(d, "}");
+      break;
     case ARRAY:
       compile(schema.getElementType(), name+"Element", d);
       break;
@@ -195,6 +208,7 @@
   private String type(Schema schema, String name) {
     switch (schema.getType()) {
     case RECORD:
+    case ENUM:
       return schema.getName() == null ? cap(name) : schema.getName();
     case ARRAY:
       return "GenericArray<"+type(schema.getElementType(),name+"Element")+">";

Modified: hadoop/avro/trunk/src/py/avro/generic.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/generic.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/generic.py (original)
+++ hadoop/avro/trunk/src/py/avro/generic.py Tue May 19 23:35:39 2009
@@ -77,6 +77,8 @@
      schema.LONG : lambda schm, object: ((isinstance(object, long) or 
                                           isinstance(object, int)) and 
                             io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE),
+     schema.ENUM : lambda schm, object:
+                                schm.getenumsymbols().__contains__(object),
      schema.ARRAY : _validatearray,
      schema.MAP : _validatemap,
      schema.RECORD : _validaterecord,
@@ -107,6 +109,7 @@
      schema.ARRAY : self.readarray,
      schema.MAP : self.readmap,
      schema.RECORD : self.readrecord,
+     schema.ENUM : self.readenum,
      schema.UNION : self.readunion
      }
 
@@ -150,6 +153,10 @@
       result[field] = self.readdata(fieldschema, valuereader)
     return result
 
+  def readenum(self, schm, valuereader):
+    index = valuereader.readint()
+    return schm.getenumsymbols()[index]
+
   def readunion(self, schm, valuereader):
     index = int(valuereader.readlong())
     return self.readdata(schm.getelementtypes()[index], valuereader)
@@ -177,6 +184,7 @@
      schema.ARRAY : self.writearray,
      schema.MAP : self.writemap,
      schema.RECORD : self.writerecord,
+     schema.ENUM : self.writeenum,
      schema.UNION : self.writeunion
      }
 
@@ -227,6 +235,10 @@
     valuewriter.writelong(index)
     self.writedata(schm.getelementtypes()[index], datum, valuewriter)
 
+  def writeenum(self, schm, datum, valuewriter):
+    index = schm.getenumordinal(datum)
+    valuewriter.writeint(index)
+
   def resolveunion(self, schm, datum):
     index = 0
     for elemtype in schm.getelementtypes():

Modified: hadoop/avro/trunk/src/py/avro/protocol.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/protocol.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/protocol.py (original)
+++ hadoop/avro/trunk/src/py/avro/protocol.py Tue May 19 23:35:39 2009
@@ -97,7 +97,8 @@
     count = 0
     for type in self.__types.values():
       typesCopy = self.__types
-      if isinstance(type, schema._RecordSchema):
+      if (isinstance(type, schema._RecordSchema) or 
+          isinstance(type, schema._EnumSchema)):
         typesCopy = self.__types.copy()
         typesCopy.pop(type.getname(), None)
       str.write(type.str(typesCopy)+"\n")

Modified: hadoop/avro/trunk/src/py/avro/reflect.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/reflect.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/reflect.py (original)
+++ hadoop/avro/trunk/src/py/avro/reflect.py Tue May 19 23:35:39 2009
@@ -69,6 +69,8 @@
      schema.LONG : lambda schm, pkgname, object: ((isinstance(object, long) or 
                                           isinstance(object, int)) and 
                             io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE),
+     schema.ENUM : lambda schm, pkgname, object:
+                                schm.getenumsymbols().__contains__(object),
      schema.ARRAY : _validatearray,
      schema.MAP : _validatemap,
      schema.RECORD : _validaterecord,

Modified: hadoop/avro/trunk/src/py/avro/schema.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/schema.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/schema.py (original)
+++ hadoop/avro/trunk/src/py/avro/schema.py Tue May 19 23:35:39 2009
@@ -17,6 +17,7 @@
 """ Contains the Schema classes.
 A schema may be one of:
   An record, mapping field names to field value data;
+  An enum, containing one of a small set of symbols;
   An array of values, all of the same schema;
   A map containing string/value pairs, each of a declared schema;
   A union of other schemas;
@@ -29,10 +30,10 @@
   A boolean."""
 
 import cStringIO
-import simplejson
+import simplejson, odict
 
 #The schema types
-STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL, ARRAY, MAP, UNION, RECORD = range(12)
+STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL, ARRAY, MAP, UNION, RECORD, ENUM = range(13)
 
 class Schema(object):
   """Base class for all Schema classes."""
@@ -282,6 +283,73 @@
       hash = hash + elem.__hash__(seen)
     return hash
 
+class _EnumSchema(Schema):
+  def __init__(self, name, space, symbols):
+    Schema.__init__(self, ENUM)
+    self.__name = name
+    self.__space = space
+    self.__symbols = symbols
+    self.__ordinals = dict()
+    i = 0
+    for symbol in symbols:
+      self.__ordinals[symbol] = i
+      i+=1
+
+  def getname(self):
+    return self.__name
+
+  def getnamespace(self):
+    return self.__namespace
+
+  def getenumsymbols(self):
+    return self.__symbols
+
+  def getenumordinal(self, symbol):
+    return self.__ordinals.get(symbol)
+
+  def str(self, names):
+    if names.get(self.__name) is self:
+      return "\""+self.__name+"\""
+    elif self.__name is not None:
+      names[self.__name] = self
+    str = cStringIO.StringIO()
+    str.write("{\"type\": \"enum\", ")
+    if self.__name is not None:
+      str.write("\"name\": \""+self.__name+"\", ")
+    str.write("\"symbols\": [")
+    count = 0
+    for symbol in self.__symbols:
+      str.write("\""+symbol+"\"")
+      count+=1
+      if count < len(self.__symbols):
+        str.write(",")
+    str.write("]}")
+    return str.getvalue()
+
+  def __eq__(self, other, seen={}):
+    if self is other or seen.get(id(self)) is other:
+      return True
+    if isinstance(other, _EnumSchema):
+      size = len(self.__symbols)
+      if len(other.__symbols) != size:
+        return False
+      seen[id(self)] = other
+      for i in range(0, size):
+        if not self.__symbols[i].__eq__(other.__symbols[i]):
+          return False
+      return True
+    else:
+      return False
+
+  def __hash__(self, seen=set()):
+    if seen.__contains__(id(self)):
+      return 0
+    seen.add(id(self))
+    hash = self.gettype().__hash__()
+    for symbol in self.__symbols:
+      hash += symbol.__hash__()
+    return hash
+
 _PRIMITIVES = {'string':_StringSchema(),
         'bytes':_BytesSchema(),
         'int':_IntSchema(),
@@ -291,20 +359,21 @@
         'boolean':_BooleanSchema(),
         'null':_NullSchema()}    
 
-class _Names(dict):
+class _Names(odict.OrderedDict):
   def __init__(self, names=_PRIMITIVES):
+    odict.OrderedDict.__init__(self)
     self.__defaults = names
 
   def get(self, key):
-    val = dict.get(self, key)
+    val = odict.OrderedDict.get(self, key)
     if val is None:
       val = self.__defaults.get(key)
     return val
 
   def __setitem__(self, key, val):
-    if dict.get(self, key) is not None:
+    if odict.OrderedDict.get(self, key) is not None:
       raise SchemaParseException("Can't redefine: "+ key.__str__())
-    dict.__setitem__(self, key, val)
+    odict.OrderedDict.__setitem__(self, key, val)
 
 class AvroException(Exception):
   pass
@@ -342,6 +411,19 @@
           raise SchemaParseException("No field type: "+field.__str__())
         fields.append((fieldname, _parse(fieldtype, names)))
       return schema
+    elif type == "enum":
+      name = obj.get("name")
+      namespace = obj.get("namespace")
+      symbolsnode = obj.get("symbols")
+      if symbolsnode == None or not isinstance(symbolsnode, list):
+        raise SchemaParseException("Enum has no symbols: "+obj.__str__())
+      symbols = list()
+      for symbol in symbolsnode:
+        symbols.append(symbol)
+      schema = _EnumSchema(name, namespace, symbols)
+      if name is not None:
+        names[name] = schema
+      return schema
     elif type == "array":
       return _ArraySchema(_parse(obj.get("items"), names))
     elif type == "map":

Modified: hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java (original)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java Tue May 19 23:35:39 2009
@@ -66,6 +66,9 @@
       for (Map.Entry<String, Schema> entry : schema.getFieldSchemas())
         record.put(entry.getKey(), generate(entry.getValue(), random, d+1));
       return record;
+    case ENUM:
+      List<String> symbols = schema.getEnumSymbols();
+      return symbols.get(random.nextInt(symbols.size()));
     case ARRAY:
       int length = (random.nextInt(5)+2)-d;
       GenericArray<Object> array = new GenericData.Array(length<=0?0:length);

Modified: hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java (original)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java Tue May 19 23:35:39 2009
@@ -114,6 +114,7 @@
     GenericRecord record =
       new GenericData.Record(PROTOCOL.getTypes().get("TestRecord"));
     record.put("name", new Utf8("foo"));
+    record.put("kind", "BAR");
     GenericRecord params =
       new GenericData.Record(PROTOCOL.getMessages().get("echo").getRequest());
     params.put("record", record);