You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2009/05/20 01:35:40 UTC
svn commit: r776496 [1/2] - in /hadoop/avro/trunk: ./ lib/py/
src/doc/content/xdocs/ src/java/org/apache/avro/
src/java/org/apache/avro/generic/ src/java/org/apache/avro/reflect/
src/java/org/apache/avro/specific/ src/py/avro/ src/test/java/org/apache/...
Author: cutting
Date: Tue May 19 23:35:39 2009
New Revision: 776496
URL: http://svn.apache.org/viewvc?rev=776496&view=rev
Log:
AVRO-18. Add support for enum types.
Added:
hadoop/avro/trunk/lib/py/odict.py
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/src/doc/content/xdocs/spec.xml
hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java
hadoop/avro/trunk/src/java/org/apache/avro/Schema.java
hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java
hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java
hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java
hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java
hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java
hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java
hadoop/avro/trunk/src/py/avro/generic.py
hadoop/avro/trunk/src/py/avro/protocol.py
hadoop/avro/trunk/src/py/avro/reflect.py
hadoop/avro/trunk/src/py/avro/schema.py
hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java
hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java
hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolSpecific.java
hadoop/avro/trunk/src/test/java/org/apache/avro/TestSchema.java
hadoop/avro/trunk/src/test/py/testio.py
hadoop/avro/trunk/src/test/py/testipc.py
hadoop/avro/trunk/src/test/py/testipcreflect.py
hadoop/avro/trunk/src/test/schemata/interop.js
hadoop/avro/trunk/src/test/schemata/simple.js
Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue May 19 23:35:39 2009
@@ -19,6 +19,8 @@
AVRO-33. C support for primitive types. (Matt Massie via cutting)
+ AVRO-18. Add support for enum types. (cutting & sharad)
+
IMPROVEMENTS
AVRO-11. Re-implement specific and reflect datum readers and
Added: hadoop/avro/trunk/lib/py/odict.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lib/py/odict.py?rev=776496&view=auto
==============================================================================
--- hadoop/avro/trunk/lib/py/odict.py (added)
+++ hadoop/avro/trunk/lib/py/odict.py Tue May 19 23:35:39 2009
@@ -0,0 +1,1399 @@
+# odict.py
+# An Ordered Dictionary object
+# Copyright (C) 2005 Nicola Larosa, Michael Foord
+# E-mail: nico AT tekNico DOT net, fuzzyman AT voidspace DOT org DOT uk
+
+# This software is licensed under the terms of the BSD license.
+# http://www.voidspace.org.uk/python/license.shtml
+# Basically you're free to copy, modify, distribute and relicense it,
+# So long as you keep a copy of the license with it.
+
+# Documentation at http://www.voidspace.org.uk/python/odict.html
+# For information about bugfixes, updates and support, please join the
+# Pythonutils mailing list:
+# http://groups.google.com/group/pythonutils/
+# Comments, suggestions and bug reports welcome.
+
+"""A dict that keeps keys in insertion order"""
+from __future__ import generators
+
+__author__ = ('Nicola Larosa <ni...@m-tekNico.net>,'
+ 'Michael Foord <fuzzyman AT voidspace DOT org DOT uk>')
+
+__docformat__ = "restructuredtext en"
+
+__revision__ = '$Id: odict.py 129 2005-09-12 18:15:28Z teknico $'
+
+__version__ = '0.2.2'
+
+__all__ = ['OrderedDict', 'SequenceOrderedDict']
+
+import sys
+INTP_VER = sys.version_info[:2]
+if INTP_VER < (2, 2):
+ raise RuntimeError("Python v.2.2 or later required")
+
+import types, warnings
+
+class OrderedDict(dict):
+ """
+ A class of dictionary that keeps the insertion order of keys.
+
+ All appropriate methods return keys, items, or values in an ordered way.
+
+ All normal dictionary methods are available. Update and comparison is
+ restricted to other OrderedDict objects.
+
+ Various sequence methods are available, including the ability to explicitly
+ mutate the key ordering.
+
+ __contains__ tests:
+
+ >>> d = OrderedDict(((1, 3),))
+ >>> 1 in d
+ 1
+ >>> 4 in d
+ 0
+
+ __getitem__ tests:
+
+ >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[2]
+ 1
+ >>> OrderedDict(((1, 3), (3, 2), (2, 1)))[4]
+ Traceback (most recent call last):
+ KeyError: 4
+
+ __len__ tests:
+
+ >>> len(OrderedDict())
+ 0
+ >>> len(OrderedDict(((1, 3), (3, 2), (2, 1))))
+ 3
+
+ get tests:
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.get(1)
+ 3
+ >>> d.get(4) is None
+ 1
+ >>> d.get(4, 5)
+ 5
+ >>> d
+ OrderedDict([(1, 3), (3, 2), (2, 1)])
+
+ has_key tests:
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.has_key(1)
+ 1
+ >>> d.has_key(4)
+ 0
+ """
+
+ def __init__(self, init_val=(), strict=False):
+ """
+ Create a new ordered dictionary. Cannot init from a normal dict,
+ nor from kwargs, since items order is undefined in those cases.
+
+ If the ``strict`` keyword argument is ``True`` (``False`` is the
+ default) then when doing slice assignment - the ``OrderedDict`` you are
+ assigning from *must not* contain any keys in the remaining dict.
+
+ >>> OrderedDict()
+ OrderedDict([])
+ >>> OrderedDict({1: 1})
+ Traceback (most recent call last):
+ TypeError: undefined order, cannot get items from dict
+ >>> OrderedDict({1: 1}.items())
+ OrderedDict([(1, 1)])
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d
+ OrderedDict([(1, 3), (3, 2), (2, 1)])
+ >>> OrderedDict(d)
+ OrderedDict([(1, 3), (3, 2), (2, 1)])
+ """
+ self.strict = strict
+ dict.__init__(self)
+ if isinstance(init_val, OrderedDict):
+ self._sequence = init_val.keys()
+ dict.update(self, init_val)
+ elif isinstance(init_val, dict):
+ # we lose compatibility with other ordered dict types this way
+ raise TypeError('undefined order, cannot get items from dict')
+ else:
+ self._sequence = []
+ self.update(init_val)
+
+### Special methods ###
+
+ def __delitem__(self, key):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> del d[3]
+ >>> d
+ OrderedDict([(1, 3), (2, 1)])
+ >>> del d[3]
+ Traceback (most recent call last):
+ KeyError: 3
+ >>> d[3] = 2
+ >>> d
+ OrderedDict([(1, 3), (2, 1), (3, 2)])
+ >>> del d[0:1]
+ >>> d
+ OrderedDict([(2, 1), (3, 2)])
+ """
+ if isinstance(key, types.SliceType):
+ # FIXME: efficiency?
+ keys = self._sequence[key]
+ for entry in keys:
+ dict.__delitem__(self, entry)
+ del self._sequence[key]
+ else:
+ # do the dict.__delitem__ *first* as it raises
+ # the more appropriate error
+ dict.__delitem__(self, key)
+ self._sequence.remove(key)
+
+ def __eq__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d == OrderedDict(d)
+ True
+ >>> d == OrderedDict(((1, 3), (2, 1), (3, 2)))
+ False
+ >>> d == OrderedDict(((1, 0), (3, 2), (2, 1)))
+ False
+ >>> d == OrderedDict(((0, 3), (3, 2), (2, 1)))
+ False
+ >>> d == dict(d)
+ False
+ >>> d == False
+ False
+ """
+ if isinstance(other, OrderedDict):
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return (self.items() == other.items())
+ else:
+ return False
+
+ def __lt__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+ >>> c < d
+ True
+ >>> d < c
+ False
+ >>> d < dict(c)
+ Traceback (most recent call last):
+ TypeError: Can only compare with other OrderedDicts
+ """
+ if not isinstance(other, OrderedDict):
+ raise TypeError('Can only compare with other OrderedDicts')
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return (self.items() < other.items())
+
+ def __le__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+ >>> e = OrderedDict(d)
+ >>> c <= d
+ True
+ >>> d <= c
+ False
+ >>> d <= dict(c)
+ Traceback (most recent call last):
+ TypeError: Can only compare with other OrderedDicts
+ >>> d <= e
+ True
+ """
+ if not isinstance(other, OrderedDict):
+ raise TypeError('Can only compare with other OrderedDicts')
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return (self.items() <= other.items())
+
+ def __ne__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d != OrderedDict(d)
+ False
+ >>> d != OrderedDict(((1, 3), (2, 1), (3, 2)))
+ True
+ >>> d != OrderedDict(((1, 0), (3, 2), (2, 1)))
+ True
+ >>> d == OrderedDict(((0, 3), (3, 2), (2, 1)))
+ False
+ >>> d != dict(d)
+ True
+ >>> d != False
+ True
+ """
+ if isinstance(other, OrderedDict):
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return not (self.items() == other.items())
+ else:
+ return True
+
+ def __gt__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+ >>> d > c
+ True
+ >>> c > d
+ False
+ >>> d > dict(c)
+ Traceback (most recent call last):
+ TypeError: Can only compare with other OrderedDicts
+ """
+ if not isinstance(other, OrderedDict):
+ raise TypeError('Can only compare with other OrderedDicts')
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return (self.items() > other.items())
+
+ def __ge__(self, other):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> c = OrderedDict(((0, 3), (3, 2), (2, 1)))
+ >>> e = OrderedDict(d)
+ >>> c >= d
+ False
+ >>> d >= c
+ True
+ >>> d >= dict(c)
+ Traceback (most recent call last):
+ TypeError: Can only compare with other OrderedDicts
+ >>> e >= d
+ True
+ """
+ if not isinstance(other, OrderedDict):
+ raise TypeError('Can only compare with other OrderedDicts')
+ # FIXME: efficiency?
+ # Generate both item lists for each compare
+ return (self.items() >= other.items())
+
+ def __repr__(self):
+ """
+ Used for __repr__ and __str__
+
+ >>> r1 = repr(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f'))))
+ >>> r1
+ "OrderedDict([('a', 'b'), ('c', 'd'), ('e', 'f')])"
+ >>> r2 = repr(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd'))))
+ >>> r2
+ "OrderedDict([('a', 'b'), ('e', 'f'), ('c', 'd')])"
+ >>> r1 == str(OrderedDict((('a', 'b'), ('c', 'd'), ('e', 'f'))))
+ True
+ >>> r2 == str(OrderedDict((('a', 'b'), ('e', 'f'), ('c', 'd'))))
+ True
+ """
+ return '%s([%s])' % (self.__class__.__name__, ', '.join(
+ ['(%r, %r)' % (key, self[key]) for key in self._sequence]))
+
+ def __setitem__(self, key, val):
+ """
+ Allows slice assignment, so long as the slice is an OrderedDict
+ >>> d = OrderedDict()
+ >>> d['a'] = 'b'
+ >>> d['b'] = 'a'
+ >>> d[3] = 12
+ >>> d
+ OrderedDict([('a', 'b'), ('b', 'a'), (3, 12)])
+ >>> d[:] = OrderedDict(((1, 2), (2, 3), (3, 4)))
+ >>> d
+ OrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> d[::2] = OrderedDict(((7, 8), (9, 10)))
+ >>> d
+ OrderedDict([(7, 8), (2, 3), (9, 10)])
+ >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)))
+ >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8)))
+ >>> d
+ OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)])
+ >>> d = OrderedDict(((0, 1), (1, 2), (2, 3), (3, 4)), strict=True)
+ >>> d[1:3] = OrderedDict(((1, 2), (5, 6), (7, 8)))
+ >>> d
+ OrderedDict([(0, 1), (1, 2), (5, 6), (7, 8), (3, 4)])
+
+ >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)), strict=True)
+ >>> a[3] = 4
+ >>> a
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])
+ Traceback (most recent call last):
+ ValueError: slice assignment must be from unique keys
+ >>> a = OrderedDict(((0, 1), (1, 2), (2, 3)))
+ >>> a[3] = 4
+ >>> a
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a[::1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a[:2] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a
+ OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a[::-1] = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> a
+ OrderedDict([(3, 4), (2, 3), (1, 2), (0, 1)])
+
+ >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> d[:1] = 3
+ Traceback (most recent call last):
+ TypeError: slice assignment requires an OrderedDict
+
+ >>> d = OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
+ >>> d[:1] = OrderedDict([(9, 8)])
+ >>> d
+ OrderedDict([(9, 8), (1, 2), (2, 3), (3, 4)])
+ """
+ if isinstance(key, types.SliceType):
+ if not isinstance(val, OrderedDict):
+ # FIXME: allow a list of tuples?
+ raise TypeError('slice assignment requires an OrderedDict')
+ keys = self._sequence[key]
+ # NOTE: Could use ``range(*key.indices(len(self._sequence)))``
+ indexes = range(len(self._sequence))[key]
+ if key.step is None:
+ # NOTE: new slice may not be the same size as the one being
+ # overwritten !
+ # NOTE: What is the algorithm for an impossible slice?
+ # e.g. d[5:3]
+ pos = key.start or 0
+ del self[key]
+ newkeys = val.keys()
+ for k in newkeys:
+ if k in self:
+ if self.strict:
+ raise ValueError('slice assignment must be from '
+ 'unique keys')
+ else:
+ # NOTE: This removes duplicate keys *first*
+ # so start position might have changed?
+ del self[k]
+ self._sequence = (self._sequence[:pos] + newkeys +
+ self._sequence[pos:])
+ dict.update(self, val)
+ else:
+ # extended slice - length of new slice must be the same
+ # as the one being replaced
+ if len(keys) != len(val):
+ raise ValueError('attempt to assign sequence of size %s '
+ 'to extended slice of size %s' % (len(val), len(keys)))
+ # FIXME: efficiency?
+ del self[key]
+ item_list = zip(indexes, val.items())
+ # smallest indexes first - higher indexes not guaranteed to
+ # exist
+ item_list.sort()
+ for pos, (newkey, newval) in item_list:
+ if self.strict and newkey in self:
+ raise ValueError('slice assignment must be from unique'
+ ' keys')
+ self.insert(pos, newkey, newval)
+ else:
+ if key not in self:
+ self._sequence.append(key)
+ dict.__setitem__(self, key, val)
+
+ def __getitem__(self, key):
+ """
+ Allows slicing. Returns an OrderedDict if you slice.
+ >>> b = OrderedDict([(7, 0), (6, 1), (5, 2), (4, 3), (3, 4), (2, 5), (1, 6)])
+ >>> b[::-1]
+ OrderedDict([(1, 6), (2, 5), (3, 4), (4, 3), (5, 2), (6, 1), (7, 0)])
+ >>> b[2:5]
+ OrderedDict([(5, 2), (4, 3), (3, 4)])
+ >>> type(b[2:4])
+ <class '__main__.OrderedDict'>
+ """
+ if isinstance(key, types.SliceType):
+ # FIXME: does this raise the error we want?
+ keys = self._sequence[key]
+ # FIXME: efficiency?
+ return OrderedDict([(entry, self[entry]) for entry in keys])
+ else:
+ return dict.__getitem__(self, key)
+
+ __str__ = __repr__
+
+ def __setattr__(self, name, value):
+ """
+ Implemented so that accesses to ``sequence`` raise a warning and are
+ diverted to the new ``setkeys`` method.
+ """
+ if name == 'sequence':
+ warnings.warn('Use of the sequence attribute is deprecated.'
+ ' Use the keys method instead.', DeprecationWarning)
+ # NOTE: doesn't return anything
+ self.setkeys(value)
+ else:
+ # FIXME: do we want to allow arbitrary setting of attributes?
+ # Or do we want to manage it?
+ object.__setattr__(self, name, value)
+
+ def __getattr__(self, name):
+ """
+ Implemented so that access to ``sequence`` raises a warning.
+
+ >>> d = OrderedDict()
+ >>> d.sequence
+ []
+ """
+ if name == 'sequence':
+ warnings.warn('Use of the sequence attribute is deprecated.'
+ ' Use the keys method instead.', DeprecationWarning)
+ # NOTE: Still (currently) returns a direct reference. Need to
+ # because code that uses sequence will expect to be able to
+ # mutate it in place.
+ return self._sequence
+ else:
+ # raise the appropriate error
+ raise AttributeError("OrderedDict has no '%s' attribute" % name)
+
+ def __deepcopy__(self, memo):
+ """
+ To allow deepcopy to work with OrderedDict.
+
+ >>> from copy import deepcopy
+ >>> a = OrderedDict([(1, 1), (2, 2), (3, 3)])
+ >>> a['test'] = {}
+ >>> b = deepcopy(a)
+ >>> b == a
+ True
+ >>> b is a
+ False
+ >>> a['test'] is b['test']
+ False
+ """
+ from copy import deepcopy
+ return self.__class__(deepcopy(self.items(), memo), self.strict)
+
+
+### Read-only methods ###
+
+ def copy(self):
+ """
+ >>> OrderedDict(((1, 3), (3, 2), (2, 1))).copy()
+ OrderedDict([(1, 3), (3, 2), (2, 1)])
+ """
+ return OrderedDict(self)
+
+ def items(self):
+ """
+ ``items`` returns a list of tuples representing all the
+ ``(key, value)`` pairs in the dictionary.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.items()
+ [(1, 3), (3, 2), (2, 1)]
+ >>> d.clear()
+ >>> d.items()
+ []
+ """
+ return zip(self._sequence, self.values())
+
+ def keys(self):
+ """
+ Return a list of keys in the ``OrderedDict``.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.keys()
+ [1, 3, 2]
+ """
+ return self._sequence[:]
+
+ def values(self, values=None):
+ """
+ Return a list of all the values in the OrderedDict.
+
+ Optionally you can pass in a list of values, which will replace the
+ current list. The value list must be the same len as the OrderedDict.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.values()
+ [3, 2, 1]
+ """
+ return [self[key] for key in self._sequence]
+
+ def iteritems(self):
+ """
+ >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iteritems()
+ >>> ii.next()
+ (1, 3)
+ >>> ii.next()
+ (3, 2)
+ >>> ii.next()
+ (2, 1)
+ >>> ii.next()
+ Traceback (most recent call last):
+ StopIteration
+ """
+ def make_iter(self=self):
+ keys = self.iterkeys()
+ while True:
+ key = keys.next()
+ yield (key, self[key])
+ return make_iter()
+
+ def iterkeys(self):
+ """
+ >>> ii = OrderedDict(((1, 3), (3, 2), (2, 1))).iterkeys()
+ >>> ii.next()
+ 1
+ >>> ii.next()
+ 3
+ >>> ii.next()
+ 2
+ >>> ii.next()
+ Traceback (most recent call last):
+ StopIteration
+ """
+ return iter(self._sequence)
+
+ __iter__ = iterkeys
+
+ def itervalues(self):
+ """
+ >>> iv = OrderedDict(((1, 3), (3, 2), (2, 1))).itervalues()
+ >>> iv.next()
+ 3
+ >>> iv.next()
+ 2
+ >>> iv.next()
+ 1
+ >>> iv.next()
+ Traceback (most recent call last):
+ StopIteration
+ """
+ def make_iter(self=self):
+ keys = self.iterkeys()
+ while True:
+ yield self[keys.next()]
+ return make_iter()
+
+### Read-write methods ###
+
+ def clear(self):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.clear()
+ >>> d
+ OrderedDict([])
+ """
+ dict.clear(self)
+ self._sequence = []
+
+ def pop(self, key, *args):
+ """
+ No dict.pop in Python 2.2, gotta reimplement it
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.pop(3)
+ 2
+ >>> d
+ OrderedDict([(1, 3), (2, 1)])
+ >>> d.pop(4)
+ Traceback (most recent call last):
+ KeyError: 4
+ >>> d.pop(4, 0)
+ 0
+ >>> d.pop(4, 0, 1)
+ Traceback (most recent call last):
+ TypeError: pop expected at most 2 arguments, got 3
+ """
+ if len(args) > 1:
+ raise TypeError, ('pop expected at most 2 arguments, got %s' %
+ (len(args) + 1))
+ if key in self:
+ val = self[key]
+ del self[key]
+ else:
+ try:
+ val = args[0]
+ except IndexError:
+ raise KeyError(key)
+ return val
+
+ def popitem(self, i=-1):
+ """
+ Delete and return an item specified by index, not a random one as in
+ dict. The index is -1 by default (the last item).
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.popitem()
+ (2, 1)
+ >>> d
+ OrderedDict([(1, 3), (3, 2)])
+ >>> d.popitem(0)
+ (1, 3)
+ >>> OrderedDict().popitem()
+ Traceback (most recent call last):
+ KeyError: 'popitem(): dictionary is empty'
+ >>> d.popitem(2)
+ Traceback (most recent call last):
+ IndexError: popitem(): index 2 not valid
+ """
+ if not self._sequence:
+ raise KeyError('popitem(): dictionary is empty')
+ try:
+ key = self._sequence[i]
+ except IndexError:
+ raise IndexError('popitem(): index %s not valid' % i)
+ return (key, self.pop(key))
+
+ def setdefault(self, key, defval = None):
+ """
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.setdefault(1)
+ 3
+ >>> d.setdefault(4) is None
+ True
+ >>> d
+ OrderedDict([(1, 3), (3, 2), (2, 1), (4, None)])
+ >>> d.setdefault(5, 0)
+ 0
+ >>> d
+ OrderedDict([(1, 3), (3, 2), (2, 1), (4, None), (5, 0)])
+ """
+ if key in self:
+ return self[key]
+ else:
+ self[key] = defval
+ return defval
+
+ def update(self, from_od):
+ """
+ Update from another OrderedDict or sequence of (key, value) pairs
+
+ >>> d = OrderedDict(((1, 0), (0, 1)))
+ >>> d.update(OrderedDict(((1, 3), (3, 2), (2, 1))))
+ >>> d
+ OrderedDict([(1, 3), (0, 1), (3, 2), (2, 1)])
+ >>> d.update({4: 4})
+ Traceback (most recent call last):
+ TypeError: undefined order, cannot get items from dict
+ >>> d.update((4, 4))
+ Traceback (most recent call last):
+ TypeError: cannot convert dictionary update sequence element "4" to a 2-item sequence
+ """
+ if isinstance(from_od, OrderedDict):
+ for key, val in from_od.items():
+ self[key] = val
+ elif isinstance(from_od, dict):
+ # we lose compatibility with other ordered dict types this way
+ raise TypeError('undefined order, cannot get items from dict')
+ else:
+ # FIXME: efficiency?
+ # sequence of 2-item sequences, or error
+ for item in from_od:
+ try:
+ key, val = item
+ except TypeError:
+ raise TypeError('cannot convert dictionary update'
+ ' sequence element "%s" to a 2-item sequence' % item)
+ self[key] = val
+
+ def rename(self, old_key, new_key):
+ """
+ Rename the key for a given value, without modifying sequence order.
+
+ For the case where new_key already exists this raise an exception,
+ since if new_key exists, it is ambiguous as to what happens to the
+ associated values, and the position of new_key in the sequence.
+
+ >>> od = OrderedDict()
+ >>> od['a'] = 1
+ >>> od['b'] = 2
+ >>> od.items()
+ [('a', 1), ('b', 2)]
+ >>> od.rename('b', 'c')
+ >>> od.items()
+ [('a', 1), ('c', 2)]
+ >>> od.rename('c', 'a')
+ Traceback (most recent call last):
+ ValueError: New key already exists: 'a'
+ >>> od.rename('d', 'b')
+ Traceback (most recent call last):
+ KeyError: 'd'
+ """
+ if new_key == old_key:
+ # no-op
+ return
+ if new_key in self:
+ raise ValueError("New key already exists: %r" % new_key)
+ # rename sequence entry
+ value = self[old_key]
+ old_idx = self._sequence.index(old_key)
+ self._sequence[old_idx] = new_key
+ # rename internal dict entry
+ dict.__delitem__(self, old_key)
+ dict.__setitem__(self, new_key, value)
+
+ def setitems(self, items):
+ """
+ This method allows you to set the items in the dict.
+
+ It takes a list of tuples - of the same sort returned by the ``items``
+ method.
+
+ >>> d = OrderedDict()
+ >>> d.setitems(((3, 1), (2, 3), (1, 2)))
+ >>> d
+ OrderedDict([(3, 1), (2, 3), (1, 2)])
+ """
+ self.clear()
+ # FIXME: this allows you to pass in an OrderedDict as well :-)
+ self.update(items)
+
+ def setkeys(self, keys):
+ """
+ ``setkeys`` all ows you to pass in a new list of keys which will
+ replace the current set. This must contain the same set of keys, but
+ need not be in the same order.
+
+ If you pass in new keys that don't match, a ``KeyError`` will be
+ raised.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.keys()
+ [1, 3, 2]
+ >>> d.setkeys((1, 2, 3))
+ >>> d
+ OrderedDict([(1, 3), (2, 1), (3, 2)])
+ >>> d.setkeys(['a', 'b', 'c'])
+ Traceback (most recent call last):
+ KeyError: 'Keylist is not the same as current keylist.'
+ """
+ # FIXME: Efficiency? (use set for Python 2.4 :-)
+ # NOTE: list(keys) rather than keys[:] because keys[:] returns
+ # a tuple, if keys is a tuple.
+ kcopy = list(keys)
+ kcopy.sort()
+ self._sequence.sort()
+ if kcopy != self._sequence:
+ raise KeyError('Keylist is not the same as current keylist.')
+ # NOTE: This makes the _sequence attribute a new object, instead
+ # of changing it in place.
+ # FIXME: efficiency?
+ self._sequence = list(keys)
+
+ def setvalues(self, values):
+ """
+ You can pass in a list of values, which will replace the
+ current list. The value list must be the same len as the OrderedDict.
+
+ (Or a ``ValueError`` is raised.)
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.setvalues((1, 2, 3))
+ >>> d
+ OrderedDict([(1, 1), (3, 2), (2, 3)])
+ >>> d.setvalues([6])
+ Traceback (most recent call last):
+ ValueError: Value list is not the same length as the OrderedDict.
+ """
+ if len(values) != len(self):
+ # FIXME: correct error to raise?
+ raise ValueError('Value list is not the same length as the '
+ 'OrderedDict.')
+ self.update(zip(self, values))
+
+### Sequence Methods ###
+
+ def index(self, key):
+ """
+ Return the position of the specified key in the OrderedDict.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.index(3)
+ 1
+ >>> d.index(4)
+ Traceback (most recent call last):
+ ValueError: list.index(x): x not in list
+ """
+ return self._sequence.index(key)
+
+ def insert(self, index, key, value):
+ """
+ Takes ``index``, ``key``, and ``value`` as arguments.
+
+ Sets ``key`` to ``value``, so that ``key`` is at position ``index`` in
+ the OrderedDict.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.insert(0, 4, 0)
+ >>> d
+ OrderedDict([(4, 0), (1, 3), (3, 2), (2, 1)])
+ >>> d.insert(0, 2, 1)
+ >>> d
+ OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2)])
+ >>> d.insert(8, 8, 1)
+ >>> d
+ OrderedDict([(2, 1), (4, 0), (1, 3), (3, 2), (8, 1)])
+ """
+ if key in self:
+ # FIXME: efficiency?
+ del self[key]
+ self._sequence.insert(index, key)
+ dict.__setitem__(self, key, value)
+
+ def reverse(self):
+ """
+ Reverse the order of the OrderedDict.
+
+ >>> d = OrderedDict(((1, 3), (3, 2), (2, 1)))
+ >>> d.reverse()
+ >>> d
+ OrderedDict([(2, 1), (3, 2), (1, 3)])
+ """
+ self._sequence.reverse()
+
+ def sort(self, *args, **kwargs):
+ """
+ Sort the key order in the OrderedDict.
+
+ This method takes the same arguments as the ``list.sort`` method on
+ your version of Python.
+
+ >>> d = OrderedDict(((4, 1), (2, 2), (3, 3), (1, 4)))
+ >>> d.sort()
+ >>> d
+ OrderedDict([(1, 4), (2, 2), (3, 3), (4, 1)])
+ """
+ self._sequence.sort(*args, **kwargs)
+
+class Keys(object):
+ # FIXME: should this object be a subclass of list?
+ """
+ Custom object for accessing the keys of an OrderedDict.
+
+ Can be called like the normal ``OrderedDict.keys`` method, but also
+ supports indexing and sequence methods.
+ """
+
+ def __init__(self, main):
+ self._main = main
+
+ def __call__(self):
+ """Pretend to be the keys method."""
+ return self._main._keys()
+
+ def __getitem__(self, index):
+ """Fetch the key at position i."""
+ # NOTE: this automatically supports slicing :-)
+ return self._main._sequence[index]
+
+ def __setitem__(self, index, name):
+ """
+ You cannot assign to keys, but you can do slice assignment to re-order
+ them.
+
+ You can only do slice assignment if the new set of keys is a reordering
+ of the original set.
+ """
+ if isinstance(index, types.SliceType):
+ # FIXME: efficiency?
+ # check length is the same
+ indexes = range(len(self._main._sequence))[index]
+ if len(indexes) != len(name):
+ raise ValueError('attempt to assign sequence of size %s '
+ 'to slice of size %s' % (len(name), len(indexes)))
+ # check they are the same keys
+ # FIXME: Use set
+ old_keys = self._main._sequence[index]
+ new_keys = list(name)
+ old_keys.sort()
+ new_keys.sort()
+ if old_keys != new_keys:
+ raise KeyError('Keylist is not the same as current keylist.')
+ orig_vals = [self._main[k] for k in name]
+ del self._main[index]
+ vals = zip(indexes, name, orig_vals)
+ vals.sort()
+ for i, k, v in vals:
+ if self._main.strict and k in self._main:
+ raise ValueError('slice assignment must be from '
+ 'unique keys')
+ self._main.insert(i, k, v)
+ else:
+ raise ValueError('Cannot assign to keys')
+
+ ### following methods pinched from UserList and adapted ###
+ def __repr__(self): return repr(self._main._sequence)
+
+ # FIXME: do we need to check if we are comparing with another ``Keys``
+ # object? (like the __cast method of UserList)
+ def __lt__(self, other): return self._main._sequence < other
+ def __le__(self, other): return self._main._sequence <= other
+ def __eq__(self, other): return self._main._sequence == other
+ def __ne__(self, other): return self._main._sequence != other
+ def __gt__(self, other): return self._main._sequence > other
+ def __ge__(self, other): return self._main._sequence >= other
+ # FIXME: do we need __cmp__ as well as rich comparisons?
+ def __cmp__(self, other): return cmp(self._main._sequence, other)
+
+ def __contains__(self, item): return item in self._main._sequence
+ def __len__(self): return len(self._main._sequence)
+ def __iter__(self): return self._main.iterkeys()
+ def count(self, item): return self._main._sequence.count(item)
+ def index(self, item, *args): return self._main._sequence.index(item, *args)
+ def reverse(self): self._main._sequence.reverse()
+ def sort(self, *args, **kwds): self._main._sequence.sort(*args, **kwds)
+ def __mul__(self, n): return self._main._sequence*n
+ __rmul__ = __mul__
+ def __add__(self, other): return self._main._sequence + other
+ def __radd__(self, other): return other + self._main._sequence
+
+ ## following methods not implemented for keys ##
+ def __delitem__(self, i): raise TypeError('Can\'t delete items from keys')
+ def __iadd__(self, other): raise TypeError('Can\'t add in place to keys')
+ def __imul__(self, n): raise TypeError('Can\'t multiply keys in place')
+ def append(self, item): raise TypeError('Can\'t append items to keys')
+ def insert(self, i, item): raise TypeError('Can\'t insert items into keys')
+ def pop(self, i=-1): raise TypeError('Can\'t pop items from keys')
+ def remove(self, item): raise TypeError('Can\'t remove items from keys')
+ def extend(self, other): raise TypeError('Can\'t extend keys')
+
+class Items(object):
+ """
+ Custom object for accessing the items of an OrderedDict.
+
+ Can be called like the normal ``OrderedDict.items`` method, but also
+ supports indexing and sequence methods.
+ """
+
+ def __init__(self, main):
+ self._main = main
+
+ def __call__(self):
+ """Pretend to be the items method."""
+ return self._main._items()
+
+ def __getitem__(self, index):
+ """Fetch the item at position i."""
+ if isinstance(index, types.SliceType):
+ # fetching a slice returns an OrderedDict
+ return self._main[index].items()
+ key = self._main._sequence[index]
+ return (key, self._main[key])
+
+ def __setitem__(self, index, item):
+ """Set item at position i to item."""
+ if isinstance(index, types.SliceType):
+ # NOTE: item must be an iterable (list of tuples)
+ self._main[index] = OrderedDict(item)
+ else:
+ # FIXME: Does this raise a sensible error?
+ orig = self._main.keys[index]
+ key, value = item
+ if self._main.strict and key in self and (key != orig):
+ raise ValueError('slice assignment must be from '
+ 'unique keys')
+ # delete the current one
+ del self._main[self._main._sequence[index]]
+ self._main.insert(index, key, value)
+
+ def __delitem__(self, i):
+ """Delete the item at position i."""
+ key = self._main._sequence[i]
+ if isinstance(i, types.SliceType):
+ for k in key:
+ # FIXME: efficiency?
+ del self._main[k]
+ else:
+ del self._main[key]
+
+ ### following methods pinched from UserList and adapted ###
+ def __repr__(self): return repr(self._main.items())
+
+ # FIXME: do we need to check if we are comparing with another ``Items``
+ # object? (like the __cast method of UserList)
+ def __lt__(self, other): return self._main.items() < other
+ def __le__(self, other): return self._main.items() <= other
+ def __eq__(self, other): return self._main.items() == other
+ def __ne__(self, other): return self._main.items() != other
+ def __gt__(self, other): return self._main.items() > other
+ def __ge__(self, other): return self._main.items() >= other
+ def __cmp__(self, other): return cmp(self._main.items(), other)
+
+ def __contains__(self, item): return item in self._main.items()
+ def __len__(self): return len(self._main._sequence) # easier :-)
+ def __iter__(self): return self._main.iteritems()
+ def count(self, item): return self._main.items().count(item)
+ def index(self, item, *args): return self._main.items().index(item, *args)
+ def reverse(self): self._main.reverse()
+ def sort(self, *args, **kwds): self._main.sort(*args, **kwds)
+ def __mul__(self, n): return self._main.items()*n
+ __rmul__ = __mul__
+ def __add__(self, other): return self._main.items() + other
+ def __radd__(self, other): return other + self._main.items()
+
+ def append(self, item):
+ """Add an item to the end."""
+ # FIXME: this is only append if the key isn't already present
+ key, value = item
+ self._main[key] = value
+
+ def insert(self, i, item):
+ key, value = item
+ self._main.insert(i, key, value)
+
+ def pop(self, i=-1):
+ key = self._main._sequence[i]
+ return (key, self._main.pop(key))
+
+ def remove(self, item):
+ key, value = item
+ try:
+ assert value == self._main[key]
+ except (KeyError, AssertionError):
+ raise ValueError('ValueError: list.remove(x): x not in list')
+ else:
+ del self._main[key]
+
+ def extend(self, other):
+ # FIXME: is only a true extend if none of the keys already present
+ for item in other:
+ key, value = item
+ self._main[key] = value
+
+ def __iadd__(self, other):
+ self.extend(other)
+
+ ## following methods not implemented for items ##
+
+ def __imul__(self, n): raise TypeError('Can\'t multiply items in place')
+
+class Values(object):
+ """
+ Custom object for accessing the values of an OrderedDict.
+
+ Can be called like the normal ``OrderedDict.values`` method, but also
+ supports indexing and sequence methods.
+ """
+
+ def __init__(self, main):
+ self._main = main
+
+ def __call__(self):
+ """Pretend to be the values method."""
+ return self._main._values()
+
+ def __getitem__(self, index):
+ """Fetch the value at position i."""
+ if isinstance(index, types.SliceType):
+ return [self._main[key] for key in self._main._sequence[index]]
+ else:
+ return self._main[self._main._sequence[index]]
+
+ def __setitem__(self, index, value):
+ """
+ Set the value at position i to value.
+
+ You can only do slice assignment to values if you supply a sequence of
+ equal length to the slice you are replacing.
+ """
+ if isinstance(index, types.SliceType):
+ keys = self._main._sequence[index]
+ if len(keys) != len(value):
+ raise ValueError('attempt to assign sequence of size %s '
+ 'to slice of size %s' % (len(name), len(keys)))
+ # FIXME: efficiency? Would be better to calculate the indexes
+ # directly from the slice object
+ # NOTE: the new keys can collide with existing keys (or even
+ # contain duplicates) - these will overwrite
+ for key, val in zip(keys, value):
+ self._main[key] = val
+ else:
+ self._main[self._main._sequence[index]] = value
+
+ ### following methods pinched from UserList and adapted ###
+ def __repr__(self): return repr(self._main.values())
+
+ # FIXME: do we need to check if we are comparing with another ``Values``
+ # object? (like the __cast method of UserList)
+ def __lt__(self, other): return self._main.values() < other
+ def __le__(self, other): return self._main.values() <= other
+ def __eq__(self, other): return self._main.values() == other
+ def __ne__(self, other): return self._main.values() != other
+ def __gt__(self, other): return self._main.values() > other
+ def __ge__(self, other): return self._main.values() >= other
+ def __cmp__(self, other): return cmp(self._main.values(), other)
+
+ def __contains__(self, item): return item in self._main.values()
+ def __len__(self): return len(self._main._sequence) # easier :-)
+ def __iter__(self): return self._main.itervalues()
+ def count(self, item): return self._main.values().count(item)
+ def index(self, item, *args): return self._main.values().index(item, *args)
+
+ def reverse(self):
+ """Reverse the values"""
+ vals = self._main.values()
+ vals.reverse()
+ # FIXME: efficiency
+ self[:] = vals
+
+ def sort(self, *args, **kwds):
+ """Sort the values."""
+ vals = self._main.values()
+ vals.sort(*args, **kwds)
+ self[:] = vals
+
+ def __mul__(self, n): return self._main.values()*n
+ __rmul__ = __mul__
+ def __add__(self, other): return self._main.values() + other
+ def __radd__(self, other): return other + self._main.values()
+
+ ## following methods not implemented for values ##
+ def __delitem__(self, i): raise TypeError('Can\'t delete items from values')
+ def __iadd__(self, other): raise TypeError('Can\'t add in place to values')
+ def __imul__(self, n): raise TypeError('Can\'t multiply values in place')
+ def append(self, item): raise TypeError('Can\'t append items to values')
+ def insert(self, i, item): raise TypeError('Can\'t insert items into values')
+ def pop(self, i=-1): raise TypeError('Can\'t pop items from values')
+ def remove(self, item): raise TypeError('Can\'t remove items from values')
+ def extend(self, other): raise TypeError('Can\'t extend values')
+
+class SequenceOrderedDict(OrderedDict):
+ """
+ Experimental version of OrderedDict that has a custom object for ``keys``,
+ ``values``, and ``items``.
+
+ These are callable sequence objects that work as methods, or can be
+ manipulated directly as sequences.
+
+ Test for ``keys``, ``items`` and ``values``.
+
+ >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+ >>> d
+ SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> d.keys
+ [1, 2, 3]
+ >>> d.keys()
+ [1, 2, 3]
+ >>> d.setkeys((3, 2, 1))
+ >>> d
+ SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+ >>> d.setkeys((1, 2, 3))
+ >>> d.keys[0]
+ 1
+ >>> d.keys[:]
+ [1, 2, 3]
+ >>> d.keys[-1]
+ 3
+ >>> d.keys[-2]
+ 2
+ >>> d.keys[0:2] = [2, 1]
+ >>> d
+ SequenceOrderedDict([(2, 3), (1, 2), (3, 4)])
+ >>> d.keys.reverse()
+ >>> d.keys
+ [3, 1, 2]
+ >>> d.keys = [1, 2, 3]
+ >>> d
+ SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> d.keys = [3, 1, 2]
+ >>> d
+ SequenceOrderedDict([(3, 4), (1, 2), (2, 3)])
+ >>> a = SequenceOrderedDict()
+ >>> b = SequenceOrderedDict()
+ >>> a.keys == b.keys
+ 1
+ >>> a['a'] = 3
+ >>> a.keys == b.keys
+ 0
+ >>> b['a'] = 3
+ >>> a.keys == b.keys
+ 1
+ >>> b['b'] = 3
+ >>> a.keys == b.keys
+ 0
+ >>> a.keys > b.keys
+ 0
+ >>> a.keys < b.keys
+ 1
+ >>> 'a' in a.keys
+ 1
+ >>> len(b.keys)
+ 2
+ >>> 'c' in d.keys
+ 0
+ >>> 1 in d.keys
+ 1
+ >>> [v for v in d.keys]
+ [3, 1, 2]
+ >>> d.keys.sort()
+ >>> d.keys
+ [1, 2, 3]
+ >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)), strict=True)
+ >>> d.keys[::-1] = [1, 2, 3]
+ >>> d
+ SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+ >>> d.keys[:2]
+ [3, 2]
+ >>> d.keys[:2] = [1, 3]
+ Traceback (most recent call last):
+ KeyError: 'Keylist is not the same as current keylist.'
+
+ >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+ >>> d
+ SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> d.values
+ [2, 3, 4]
+ >>> d.values()
+ [2, 3, 4]
+ >>> d.setvalues((4, 3, 2))
+ >>> d
+ SequenceOrderedDict([(1, 4), (2, 3), (3, 2)])
+ >>> d.values[::-1]
+ [2, 3, 4]
+ >>> d.values[0]
+ 4
+ >>> d.values[-2]
+ 3
+ >>> del d.values[0]
+ Traceback (most recent call last):
+ TypeError: Can't delete items from values
+ >>> d.values[::2] = [2, 4]
+ >>> d
+ SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> 7 in d.values
+ 0
+ >>> len(d.values)
+ 3
+ >>> [val for val in d.values]
+ [2, 3, 4]
+ >>> d.values[-1] = 2
+ >>> d.values.count(2)
+ 2
+ >>> d.values.index(2)
+ 0
+ >>> d.values[-1] = 7
+ >>> d.values
+ [2, 3, 7]
+ >>> d.values.reverse()
+ >>> d.values
+ [7, 3, 2]
+ >>> d.values.sort()
+ >>> d.values
+ [2, 3, 7]
+ >>> d.values.append('anything')
+ Traceback (most recent call last):
+ TypeError: Can't append items to values
+ >>> d.values = (1, 2, 3)
+ >>> d
+ SequenceOrderedDict([(1, 1), (2, 2), (3, 3)])
+
+ >>> d = SequenceOrderedDict(((1, 2), (2, 3), (3, 4)))
+ >>> d
+ SequenceOrderedDict([(1, 2), (2, 3), (3, 4)])
+ >>> d.items()
+ [(1, 2), (2, 3), (3, 4)]
+ >>> d.setitems([(3, 4), (2 ,3), (1, 2)])
+ >>> d
+ SequenceOrderedDict([(3, 4), (2, 3), (1, 2)])
+ >>> d.items[0]
+ (3, 4)
+ >>> d.items[:-1]
+ [(3, 4), (2, 3)]
+ >>> d.items[1] = (6, 3)
+ >>> d.items
+ [(3, 4), (6, 3), (1, 2)]
+ >>> d.items[1:2] = [(9, 9)]
+ >>> d
+ SequenceOrderedDict([(3, 4), (9, 9), (1, 2)])
+ >>> del d.items[1:2]
+ >>> d
+ SequenceOrderedDict([(3, 4), (1, 2)])
+ >>> (3, 4) in d.items
+ 1
+ >>> (4, 3) in d.items
+ 0
+ >>> len(d.items)
+ 2
+ >>> [v for v in d.items]
+ [(3, 4), (1, 2)]
+ >>> d.items.count((3, 4))
+ 1
+ >>> d.items.index((1, 2))
+ 1
+ >>> d.items.index((2, 1))
+ Traceback (most recent call last):
+ ValueError: list.index(x): x not in list
+ >>> d.items.reverse()
+ >>> d.items
+ [(1, 2), (3, 4)]
+ >>> d.items.reverse()
+ >>> d.items.sort()
+ >>> d.items
+ [(1, 2), (3, 4)]
+ >>> d.items.append((5, 6))
+ >>> d.items
+ [(1, 2), (3, 4), (5, 6)]
+ >>> d.items.insert(0, (0, 0))
+ >>> d.items
+ [(0, 0), (1, 2), (3, 4), (5, 6)]
+ >>> d.items.insert(-1, (7, 8))
+ >>> d.items
+ [(0, 0), (1, 2), (3, 4), (7, 8), (5, 6)]
+ >>> d.items.pop()
+ (5, 6)
+ >>> d.items
+ [(0, 0), (1, 2), (3, 4), (7, 8)]
+ >>> d.items.remove((1, 2))
+ >>> d.items
+ [(0, 0), (3, 4), (7, 8)]
+ >>> d.items.extend([(1, 2), (5, 6)])
+ >>> d.items
+ [(0, 0), (3, 4), (7, 8), (1, 2), (5, 6)]
+ """
+
+ def __init__(self, init_val=(), strict=True):
+ OrderedDict.__init__(self, init_val, strict=strict)
+ self._keys = self.keys
+ self._values = self.values
+ self._items = self.items
+ self.keys = Keys(self)
+ self.values = Values(self)
+ self.items = Items(self)
+ self._att_dict = {
+ 'keys': self.setkeys,
+ 'items': self.setitems,
+ 'values': self.setvalues,
+ }
+
+ def __setattr__(self, name, value):
+ """Protect keys, items, and values."""
+ if not '_att_dict' in self.__dict__:
+ object.__setattr__(self, name, value)
+ else:
+ try:
+ fun = self._att_dict[name]
+ except KeyError:
+ OrderedDict.__setattr__(self, name, value)
+ else:
+ fun(value)
+
+if __name__ == '__main__':
+ if INTP_VER < (2, 3):
+ raise RuntimeError("Tests require Python v.2.3 or later")
+ # turn off warnings for tests
+ warnings.filterwarnings('ignore')
+ # run the code tests in doctest format
+ import doctest
+ m = sys.modules.get('__main__')
+ globs = m.__dict__.copy()
+ globs.update({
+ 'INTP_VER': INTP_VER,
+ })
+ doctest.testmod(m, globs=globs)
+
Modified: hadoop/avro/trunk/src/doc/content/xdocs/spec.xml
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/doc/content/xdocs/spec.xml?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/doc/content/xdocs/spec.xml (original)
+++ hadoop/avro/trunk/src/doc/content/xdocs/spec.xml Tue May 19 23:35:39 2009
@@ -82,11 +82,11 @@
</section>
- <section id="schema_compound">
- <title>Compound Types</title>
+ <section id="schema_complex">
+ <title>Complex Types</title>
- <p>Avro supports three kinds of compound types: records,
- arrays and unions.</p>
+ <p>Avro supports five kinds of complex types: records, enums,
+ arrays, maps and unions.</p>
<section>
<title>Records</title>
@@ -94,7 +94,7 @@
<p>Records use the type name "record" and support two attributes:</p>
<ul>
<li><code>name</code>: a JSON string providing the name
- of the record (optional).</li>
+ of the record (required).</li>
<li><code>fields</code>: a JSON array, listing fields (required).
Each field is a JSON object with the following attributes:
<ul>
@@ -119,6 +119,7 @@
<tr><td>boolean</td><td>boolean</td><td>true</td></tr>
<tr><td>null</td><td>null</td><td>null</td></tr>
<tr><td>record</td><td>object</td><td>{"a": 1}</td></tr>
+ <tr><td>enum</td><td>string</td><td>"FOO"</td></tr>
<tr><td>array</td><td>array</td><td>[1]</td></tr>
<tr><td>map</td><td>object</td><td>{"a": 1}</td></tr>
</table>
@@ -141,6 +142,26 @@
</section>
<section>
+ <title>Enums</title>
+
+ <p>Enums use the type name "enum" and support the following
+ attributes:</p>
+ <ul>
+ <li><code>name</code>: a JSON string providing the name
+ of the enum (required).</li>
+ <li><code>symbols</code>: a JSON array, listing symbols,
+ as JSON strings (required).</li>
+ </ul>
+ <p>For example, playing card suits might be defined with:</p>
+ <source>
+{ "type": "enum",
+ "name": "Suit",
+ "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"]
+}
+ </source>
+ </section>
+
+ <section>
<title>Arrays</title>
<p>Arrays use the type name <code>"array"</code> and support
a single attribute:</p>
@@ -178,11 +199,11 @@
reading and writing unions.)</p>
<p>Unions may not immediately contain other unions.</p>
</section>
- </section> <!-- end compound types -->
+ </section> <!-- end complex types -->
<section>
<title>Identifiers</title>
- <p>Record and field names must:</p>
+ <p>Record, field and enum names must:</p>
<ul>
<li>start with <code>[A-Za-z_]</code></li>
<li>subsequently contain only <code>[A-Za-z0-9_]</code></li>
@@ -252,9 +273,9 @@
</section>
- <section id="serialize_compound">
- <title>Compound Type Serialization</title>
- <p>Compound types are serialized as follows:</p>
+ <section id="serialize_complex">
+ <title>Complex Type Serialization</title>
+ <p>Complex types are serialized as follows:</p>
<section>
<title>Records</title>
@@ -284,6 +305,20 @@
</section>
<section>
+ <title>Enums</title>
+ <p>An enum is serialized by a <code>int</code>, representing
+ the zero-based position of the symbol in the schema.</p>
+ <p>For example, consider the enum:</p>
+ <source>
+{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] }
+ </source>
+ <p>This would be serialized by an <code>int</code> between
+ zero and three, with zero indicating "A", and 3 indicating
+ "D".</p>
+ </section>
+
+
+ <section>
<title>Arrays</title>
<p>Arrays are serialized as a series of <em>blocks</em>.
Each block consists of a <code>long</code> <em>count</em>
@@ -363,7 +398,7 @@
<source>00 02 61</source></li>
</ul>
</section>
- </section> <!-- end compound types -->
+ </section> <!-- end complex types -->
</section>
@@ -446,11 +481,11 @@
<ul>
<li><em>name</em>, string, to distinguish it from other protocols;</li>
<li><em>namespace</em>, a string which qualifies the name;</li>
- <li><em>types</em>, a list of record and error definitions.
- An error definition is just like a record definition except
- it uses "error" instead of "record". Note that forward
- references to records and errors are not currently
- supported.</li>
+ <li><em>types</em>, a list of record, enum and error
+ definitions. An error definition is just like a record
+ definition except it uses "error" instead of "record". Note
+ that forward references to records, enums and errors are not
+ currently supported.</li>
<li><em>messages</em>, a JSON object whose keys are message
names and whose values are objects whose attributes are
described below. No two messages may have the same name.</li>
@@ -667,6 +702,7 @@
<ul>
<li>both schemas are arrays whose item types match</li>
<li>both schemas are maps whose value types match</li>
+ <li>both schemas are enums whose names match</li>
<li>both schemas are records with the same name</li>
<li>either schema is a union</li>
<li>both schemas have same primitive type</li>
@@ -695,6 +731,11 @@
list order may also vary.</p>
</li>
+ <li><strong>if both are enums:</strong>
+ <p>if the writer's symbol is not present in the reader's
+ enum, then the enum value is unset.</p>
+ </li>
+
<li><strong>if both are arrays:</strong>
<p>This resolution algorithm is applied recursively to the reader's and
writer's array item schemas.</p>
Modified: hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/Protocol.java Tue May 19 23:35:39 2009
@@ -151,7 +151,7 @@
public String getNamespace() { return namespace; }
/** The types of this protocol. */
- public Map<String,Schema> getTypes() { return types; }
+ public LinkedHashMap<String,Schema> getTypes() { return types; }
/** The messages of this protocol. */
public Map<String,Message> getMessages() { return messages; }
Modified: hadoop/avro/trunk/src/java/org/apache/avro/Schema.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/Schema.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/Schema.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/Schema.java Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -37,7 +38,8 @@
/** An abstract data type.
* <p>A schema may be one of:
* <ul>
- * <li>An <i>record</i>, mapping field names to field value data;
+ * <li>A <i>record</i>, mapping field names to field value data;
+ * <li>An <i>enum</i>, containing one of a small set of symbols;
* <li>An <i>array</i> of values, all of the same schema;
* <li>A <i>map</i>, containing string/value pairs, of a declared schema;
* <li>A <i>union</i> of other schemas;
@@ -61,7 +63,7 @@
/** The type of a schema. */
public enum Type
- { RECORD, ARRAY, MAP, UNION, STRING, BYTES,
+ { RECORD, ENUM, ARRAY, MAP, UNION, STRING, BYTES,
INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL };
private final Type type;
@@ -96,6 +98,12 @@
return new RecordSchema(name, namespace, isError);
}
+ /** Create an enum schema. */
+ public static Schema createEnum(String name, String namespace,
+ List<String> values) {
+ return new EnumSchema(name, namespace, values);
+ }
+
/** Create an array schema. */
public static Schema createArray(Schema elementType) {
return new ArraySchema(elementType);
@@ -129,14 +137,25 @@
throw new AvroRuntimeException("Not a record: "+this);
}
- /** If this is a record, returns its name, if any. */
+ /** If this is an enum, return its symbols. */
+ public List<String> getEnumSymbols() {
+ throw new AvroRuntimeException("Not an enum: "+this);
+ }
+
+ /** If this is an enum, return a symbol's ordinal value. */
+ public int getEnumOrdinal(String symbol) {
+ throw new AvroRuntimeException("Not an enum: "+this);
+ }
+
+
+ /** If this is a record or enum, returns its name, if any. */
public String getName() {
- throw new AvroRuntimeException("Not a record: "+this);
+ throw new AvroRuntimeException("Not a record or enum: "+this);
}
- /** If this is a record, returns its namespace, if any. */
+ /** If this is a record or enum, returns its namespace, if any. */
public String getNamespace() {
- throw new AvroRuntimeException("Not a record: "+this);
+ throw new AvroRuntimeException("Not a record or enum: "+this);
}
/** Returns true if this record is an error type. */
@@ -283,6 +302,44 @@
}
}
+ private static class EnumSchema extends NamedSchema {
+ private final List<String> symbols;
+ private final Map<String,Integer> ordinals;
+ public EnumSchema(String name, String space, List<String> symbols) {
+ super(Type.ENUM, name, space);
+ this.symbols = symbols;
+ this.ordinals = new HashMap<String,Integer>();
+ int i = 0;
+ for (String symbol : symbols)
+ ordinals.put(symbol, i++);
+ }
+ public List<String> getEnumSymbols() { return symbols; }
+ public int getEnumOrdinal(String symbol) { return ordinals.get(symbol); }
+ public boolean equals(Object o) {
+ if (o == this) return true;
+ if (!(o instanceof EnumSchema)) return false;
+ EnumSchema that = (EnumSchema)o;
+ return equalNames(that) && symbols.equals(that.symbols);
+ }
+ public int hashCode() { return super.hashCode() + symbols.hashCode(); }
+ public String toString(Names names) {
+ if (this.equals(names.get(name))) return "\""+name+"\"";
+ else if (name != null) names.put(name, this);
+ StringBuilder buffer = new StringBuilder();
+ buffer.append("{\"type\": \"enum\", "
+ +"\"name\": \""+name+"\", "
+ +"\"symbols\": [");
+ int count = 0;
+ for (String symbol : symbols) {
+ buffer.append("\""+symbol+"\"");
+ if (++count < symbols.size())
+ buffer.append(", ");
+ }
+ buffer.append("]}");
+ return buffer.toString();
+ }
+ }
+
private static class ArraySchema extends Schema {
private final Schema elementType;
public ArraySchema(Schema elementType) {
@@ -336,14 +393,18 @@
this.types = types;
int seen = 0;
for (Schema type : types) { // check legality of union
- if (type.getType() == Type.UNION)
+ switch (type.getType()) {
+ case UNION:
throw new AvroRuntimeException("Nested union: "+this);
- int mask = 1 << type.getType().ordinal();
- if (type.getType() == Type.RECORD && type.getName() != null)
- continue;
- if ((seen & mask) != 0)
- throw new AvroRuntimeException("Ambiguous union: "+this);
- seen |= mask;
+ case RECORD:
+ if (type.getName() != null)
+ continue;
+ default:
+ int mask = 1 << type.getType().ordinal();
+ if ((seen & mask) != 0)
+ throw new AvroRuntimeException("Ambiguous union: "+this);
+ seen |= mask;
+ }
}
}
public List<Schema> getTypes() { return types; }
@@ -509,6 +570,20 @@
}
result.setFields(fields);
return result;
+ } else if (type.equals("enum")) { // enum
+ JsonNode nameNode = schema.getFieldValue("name");
+ String name = nameNode != null ? nameNode.getTextValue() : null;
+ JsonNode spaceNode = schema.getFieldValue("namespace");
+ String space = spaceNode!=null?spaceNode.getTextValue():names.space();
+ JsonNode symbolsNode = schema.getFieldValue("symbols");
+ if (symbolsNode == null || !symbolsNode.isArray())
+ throw new SchemaParseException("Enum has no symbols: "+schema);
+ List<String> symbols = new ArrayList<String>();
+ for (JsonNode n : symbolsNode)
+ symbols.add(n.getTextValue());
+ Schema result = new EnumSchema(name, space, symbols);
+ if (name != null) names.put(name, result);
+ return result;
} else if (type.equals("array")) { // array
return new ArraySchema(parse(schema.getFieldValue("items"), names));
} else if (type.equals("map")) { // map
Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericData.java Tue May 19 23:35:39 2009
@@ -107,6 +107,8 @@
if (!validate(entry.getValue(), fields.get(entry.getKey())))
return false;
return true;
+ case ENUM:
+ return schema.getEnumSymbols().contains(datum);
case ARRAY:
if (!(datum instanceof GenericArray)) return false;
for (Object element : (GenericArray)datum)
Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumReader.java Tue May 19 23:35:39 2009
@@ -67,6 +67,7 @@
expected = resolveExpected(actual, expected);
switch (actual.getType()) {
case RECORD: return readRecord(old, actual, expected, in);
+ case ENUM: return readEnum(actual, expected, in);
case ARRAY: return readArray(old, actual, expected, in);
case MAP: return readMap(old, actual, expected, in);
case STRING: return readString(old, in);
@@ -85,12 +86,15 @@
// first scan for exact match
for (Schema branch : expected.getTypes())
if (branch.getType() == actual.getType())
- if (branch.getType() == Type.RECORD) {
+ switch (branch.getType()) {
+ case RECORD:
String name = branch.getName();
if (name == null || name.equals(actual.getName()))
return branch;
- } else
+ break;
+ default:
return branch;
+ }
// then scan match via numeric promotion
for (Schema branch : expected.getTypes())
switch (actual.getType()) {
@@ -203,6 +207,8 @@
}
}
return record;
+ case ENUM:
+ return createEnum(json.getTextValue(), schema);
case ARRAY:
Object array = newArray(old, json.size());
Schema element = schema.getElementType();
@@ -231,6 +237,20 @@
}
}
+ /** Called to read an enum value. May be overridden for alternate enum
+ * representations. By default, returns the symbol as a String. */
+ protected Object readEnum(Schema actual, Schema expected, ValueReader in)
+ throws IOException {
+ String name = expected.getName();
+ if (name != null && !name.equals(actual.getName()))
+ throw new AvroTypeException("Expected "+expected+", found "+actual);
+ return createEnum(actual.getEnumSymbols().get(in.readInt()), expected);
+ }
+
+ /** Called to create an enum value. May be overridden for alternate enum
+ * representations. By default, returns the symbol as a String. */
+ protected Object createEnum(String symbol, Schema schema) { return symbol; }
+
/** Called to read an array instance. May be overridden for alternate array
* representations.*/
@SuppressWarnings(value="unchecked")
@@ -355,6 +375,9 @@
for (Map.Entry<String, Schema> entry : schema.getFieldSchemas())
skip(entry.getValue(), in);
break;
+ case ENUM:
+ in.readInt();
+ break;
case ARRAY:
Schema elementType = schema.getElementType();
for (int l = (int)in.readLong(); l > 0; l = (int)in.readLong())
Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/GenericDatumWriter.java Tue May 19 23:35:39 2009
@@ -53,6 +53,7 @@
throws IOException {
switch (schema.getType()) {
case RECORD: writeRecord(schema, datum, out); break;
+ case ENUM: writeEnum(schema, datum, out); break;
case ARRAY: writeArray(schema, datum, out); break;
case MAP: writeMap(schema, datum, out); break;
case UNION:
@@ -89,6 +90,13 @@
return ((GenericRecord) record).get(field);
}
+ /** Called to write an enum value. May be overridden for alternate enum
+ * representations.*/
+ protected void writeEnum(Schema schema, Object datum, ValueWriter out)
+ throws IOException {
+ out.writeInt(schema.getEnumOrdinal((String)datum));
+ }
+
/** Called to write a array. May be overridden for alternate array
* representations.*/
protected void writeArray(Schema schema, Object datum, ValueWriter out)
@@ -178,6 +186,7 @@
if (!isRecord(datum)) return false;
return (schema.getName() == null) ||
schema.getName().equals(((GenericRecord)datum).getSchema().getName());
+ case ENUM: return isEnum(datum);
case ARRAY: return isArray(datum);
case MAP: return isMap(datum);
case STRING: return isString(datum);
@@ -191,7 +200,7 @@
default: throw new AvroRuntimeException("Unexpected type: " +schema);
}
}
-
+
/** Called by the default implementation of {@link #instanceOf}.*/
protected boolean isArray(Object datum) {
return datum instanceof GenericArray;
@@ -203,6 +212,11 @@
}
/** Called by the default implementation of {@link #instanceOf}.*/
+ protected boolean isEnum(Object datum) {
+ return datum instanceof String;
+ }
+
+ /** Called by the default implementation of {@link #instanceOf}.*/
protected boolean isMap(Object datum) {
return (datum instanceof Map) && (!(datum instanceof GenericRecord));
}
Modified: hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/generic/package.html Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
<p>Uses the following mapping:
<ul>
<li>Schema records are implemented as {@link org.apache.avro.generic.GenericRecord}.
+<li>Schema enums are implemented as {@link java.lang.String}.
<li>Schema arrays are implemented as {@link org.apache.avro.generic.GenericArray}.
<li>Schema maps are implemented as {@link java.util.Map}.
<li>Schema strings are implemented as {@link org.apache.avro.util.Utf8}.
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectData.java Tue May 19 23:35:39 2009
@@ -24,6 +24,7 @@
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -64,6 +65,9 @@
}
}
return true;
+ case ENUM:
+ return datum instanceof Enum
+ && schema.getEnumSymbols().contains(((Enum)datum).name());
case ARRAY:
if (!(datum instanceof GenericArray)) return false;
for (Object element : (GenericArray)datum)
@@ -99,7 +103,7 @@
public static Schema getSchema(java.lang.reflect.Type type) {
Schema schema = SCHEMA_CACHE.get(type);
if (schema == null) {
- schema = createSchema(type, new HashMap<String,Schema>());
+ schema = createSchema(type, new LinkedHashMap<String,Schema>());
SCHEMA_CACHE.put(type, schema);
}
return schema;
@@ -142,14 +146,27 @@
throw new AvroTypeException("Map key class not Utf8: "+key);
return Schema.createMap(createSchema(value, names));
}
- } else if (type instanceof Class) { // record
+ } else if (type instanceof Class) {
Class c = (Class)type;
- String name = c.getSimpleName(); // FIXME: ignoring package
+ String name = c.getSimpleName();
+ String space = c.getPackage().getName();
+
Schema schema = names.get(name);
if (schema == null) {
+
+ if (c.isEnum()) { // enum
+ List<String> symbols = new ArrayList<String>();
+ Enum[] constants = (Enum[])c.getEnumConstants();
+ for (int i = 0; i < constants.length; i++)
+ symbols.add(constants[i].name());
+ schema = Schema.createEnum(name, space, symbols);
+ names.put(name, schema);
+ return schema;
+ }
+ // record
LinkedHashMap<String,Schema.Field> fields =
new LinkedHashMap<String,Schema.Field>();
- schema = Schema.createRecord(name, c.getPackage().getName(),
+ schema = Schema.createRecord(name, space,
Throwable.class.isAssignableFrom(c));
if (!names.containsKey(name))
names.put(name, schema);
@@ -178,6 +195,16 @@
if ((method.getModifiers() & Modifier.STATIC) == 0)
protocol.getMessages().put(method.getName(),
getMessage(method, protocol));
+
+ // reverse types, since they were defined in reference order
+ List<Map.Entry<String,Schema>> names =
+ new ArrayList<Map.Entry<String,Schema>>();
+ names.addAll(protocol.getTypes().entrySet());
+ Collections.reverse(names);
+ protocol.getTypes().clear();
+ for (Map.Entry<String,Schema> name : names)
+ protocol.getTypes().put(name.getKey(), name.getValue());
+
return protocol;
}
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumReader.java Tue May 19 23:35:39 2009
@@ -43,12 +43,7 @@
}
protected Object newRecord(Object old, Schema schema) {
- Class c;
- try {
- c = Class.forName(packageName+schema.getName());
- } catch (ClassNotFoundException e) {
- throw new AvroRuntimeException(e);
- }
+ Class c = getClass(schema);
return(c.isInstance(old) ? old : newInstance(c));
}
@@ -74,10 +69,31 @@
addField(record, name, position, null);
}
+ @SuppressWarnings("unchecked")
+ protected Object createEnum(String symbol, Schema schema) {
+ return Enum.valueOf(getClass(schema), symbol);
+ }
+
private static final Class<?>[] EMPTY_ARRAY = new Class[]{};
private static final Map<Class,Constructor> CTOR_CACHE =
new ConcurrentHashMap<Class,Constructor>();
+ private Map<String,Class> classCache = new ConcurrentHashMap<String,Class>();
+
+ private Class getClass(Schema schema) {
+ String name = schema.getName();
+ Class c = classCache.get(name);
+ if (c == null) {
+ try {
+ c = Class.forName(packageName + name);
+ classCache.put(name, c);
+ } catch (ClassNotFoundException e) {
+ throw new AvroRuntimeException(e);
+ }
+ }
+ return c;
+ }
+
/** Create a new instance of the named class. */
@SuppressWarnings("unchecked")
protected static Object newInstance(Class c) {
@@ -95,4 +111,6 @@
}
return result;
}
+
}
+
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectDatumWriter.java Tue May 19 23:35:39 2009
@@ -45,6 +45,15 @@
}
}
+ protected void writeEnum(Schema schema, Object datum, ValueWriter out)
+ throws IOException {
+ out.writeInt(((Enum)datum).ordinal());
+ }
+
+ protected boolean isEnum(Object datum) {
+ return datum instanceof Enum;
+ }
+
@Override
protected boolean isRecord(Object datum) {
return ReflectData.getSchema(datum.getClass()).getType() == Type.RECORD;
Modified: hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/reflect/ReflectResponder.java Tue May 19 23:35:39 2009
@@ -102,7 +102,8 @@
private Class paramType(Schema schema) throws ClassNotFoundException {
switch (schema.getType()) {
- case RECORD: return Class.forName(packageName+schema.getName());
+ case RECORD:
+ case ENUM: return Class.forName(packageName+schema.getName());
case ARRAY: return GenericArray.class;
case MAP: return Map.class;
case UNION: return Object.class;
Modified: hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java (original)
+++ hadoop/avro/trunk/src/java/org/apache/avro/specific/SpecificCompiler.java Tue May 19 23:35:39 2009
@@ -171,6 +171,19 @@
compile(field.getValue(), null, d+1);
break;
+ case ENUM:
+ buffer.append("\n");
+ line(d, ((d==0)?"public ":"")+"enum "+type+" { ");
+ StringBuilder b = new StringBuilder();
+ int count = 0;
+ for (String symbol : schema.getEnumSymbols()) {
+ b.append(symbol);
+ if (++count < schema.getEnumSymbols().size())
+ b.append(", ");
+ }
+ line(d+1, b.toString());
+ line(d, "}");
+ break;
case ARRAY:
compile(schema.getElementType(), name+"Element", d);
break;
@@ -195,6 +208,7 @@
private String type(Schema schema, String name) {
switch (schema.getType()) {
case RECORD:
+ case ENUM:
return schema.getName() == null ? cap(name) : schema.getName();
case ARRAY:
return "GenericArray<"+type(schema.getElementType(),name+"Element")+">";
Modified: hadoop/avro/trunk/src/py/avro/generic.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/generic.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/generic.py (original)
+++ hadoop/avro/trunk/src/py/avro/generic.py Tue May 19 23:35:39 2009
@@ -77,6 +77,8 @@
schema.LONG : lambda schm, object: ((isinstance(object, long) or
isinstance(object, int)) and
io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE),
+ schema.ENUM : lambda schm, object:
+ schm.getenumsymbols().__contains__(object),
schema.ARRAY : _validatearray,
schema.MAP : _validatemap,
schema.RECORD : _validaterecord,
@@ -107,6 +109,7 @@
schema.ARRAY : self.readarray,
schema.MAP : self.readmap,
schema.RECORD : self.readrecord,
+ schema.ENUM : self.readenum,
schema.UNION : self.readunion
}
@@ -150,6 +153,10 @@
result[field] = self.readdata(fieldschema, valuereader)
return result
+ def readenum(self, schm, valuereader):
+ index = valuereader.readint()
+ return schm.getenumsymbols()[index]
+
def readunion(self, schm, valuereader):
index = int(valuereader.readlong())
return self.readdata(schm.getelementtypes()[index], valuereader)
@@ -177,6 +184,7 @@
schema.ARRAY : self.writearray,
schema.MAP : self.writemap,
schema.RECORD : self.writerecord,
+ schema.ENUM : self.writeenum,
schema.UNION : self.writeunion
}
@@ -227,6 +235,10 @@
valuewriter.writelong(index)
self.writedata(schm.getelementtypes()[index], datum, valuewriter)
+ def writeenum(self, schm, datum, valuewriter):
+ index = schm.getenumordinal(datum)
+ valuewriter.writeint(index)
+
def resolveunion(self, schm, datum):
index = 0
for elemtype in schm.getelementtypes():
Modified: hadoop/avro/trunk/src/py/avro/protocol.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/protocol.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/protocol.py (original)
+++ hadoop/avro/trunk/src/py/avro/protocol.py Tue May 19 23:35:39 2009
@@ -97,7 +97,8 @@
count = 0
for type in self.__types.values():
typesCopy = self.__types
- if isinstance(type, schema._RecordSchema):
+ if (isinstance(type, schema._RecordSchema) or
+ isinstance(type, schema._EnumSchema)):
typesCopy = self.__types.copy()
typesCopy.pop(type.getname(), None)
str.write(type.str(typesCopy)+"\n")
Modified: hadoop/avro/trunk/src/py/avro/reflect.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/reflect.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/reflect.py (original)
+++ hadoop/avro/trunk/src/py/avro/reflect.py Tue May 19 23:35:39 2009
@@ -69,6 +69,8 @@
schema.LONG : lambda schm, pkgname, object: ((isinstance(object, long) or
isinstance(object, int)) and
io._LONG_MIN_VALUE <= object <= io._LONG_MAX_VALUE),
+ schema.ENUM : lambda schm, pkgname, object:
+ schm.getenumsymbols().__contains__(object),
schema.ARRAY : _validatearray,
schema.MAP : _validatemap,
schema.RECORD : _validaterecord,
Modified: hadoop/avro/trunk/src/py/avro/schema.py
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/py/avro/schema.py?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/py/avro/schema.py (original)
+++ hadoop/avro/trunk/src/py/avro/schema.py Tue May 19 23:35:39 2009
@@ -17,6 +17,7 @@
""" Contains the Schema classes.
A schema may be one of:
An record, mapping field names to field value data;
+ An enum, containing one of a small set of symbols;
An array of values, all of the same schema;
A map containing string/value pairs, each of a declared schema;
A union of other schemas;
@@ -29,10 +30,10 @@
A boolean."""
import cStringIO
-import simplejson
+import simplejson, odict
#The schema types
-STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL, ARRAY, MAP, UNION, RECORD = range(12)
+STRING, BYTES, INT, LONG, FLOAT, DOUBLE, BOOLEAN, NULL, ARRAY, MAP, UNION, RECORD, ENUM = range(13)
class Schema(object):
"""Base class for all Schema classes."""
@@ -282,6 +283,73 @@
hash = hash + elem.__hash__(seen)
return hash
+class _EnumSchema(Schema):
+ def __init__(self, name, space, symbols):
+ Schema.__init__(self, ENUM)
+ self.__name = name
+ self.__space = space
+ self.__symbols = symbols
+ self.__ordinals = dict()
+ i = 0
+ for symbol in symbols:
+ self.__ordinals[symbol] = i
+ i+=1
+
+ def getname(self):
+ return self.__name
+
+ def getnamespace(self):
+ return self.__namespace
+
+ def getenumsymbols(self):
+ return self.__symbols
+
+ def getenumordinal(self, symbol):
+ return self.__ordinals.get(symbol)
+
+ def str(self, names):
+ if names.get(self.__name) is self:
+ return "\""+self.__name+"\""
+ elif self.__name is not None:
+ names[self.__name] = self
+ str = cStringIO.StringIO()
+ str.write("{\"type\": \"enum\", ")
+ if self.__name is not None:
+ str.write("\"name\": \""+self.__name+"\", ")
+ str.write("\"symbols\": [")
+ count = 0
+ for symbol in self.__symbols:
+ str.write("\""+symbol+"\"")
+ count+=1
+ if count < len(self.__symbols):
+ str.write(",")
+ str.write("]}")
+ return str.getvalue()
+
+ def __eq__(self, other, seen={}):
+ if self is other or seen.get(id(self)) is other:
+ return True
+ if isinstance(other, _EnumSchema):
+ size = len(self.__symbols)
+ if len(other.__symbols) != size:
+ return False
+ seen[id(self)] = other
+ for i in range(0, size):
+ if not self.__symbols[i].__eq__(other.__symbols[i]):
+ return False
+ return True
+ else:
+ return False
+
+ def __hash__(self, seen=set()):
+ if seen.__contains__(id(self)):
+ return 0
+ seen.add(id(self))
+ hash = self.gettype().__hash__()
+ for symbol in self.__symbols:
+ hash += symbol.__hash__()
+ return hash
+
_PRIMITIVES = {'string':_StringSchema(),
'bytes':_BytesSchema(),
'int':_IntSchema(),
@@ -291,20 +359,21 @@
'boolean':_BooleanSchema(),
'null':_NullSchema()}
-class _Names(dict):
+class _Names(odict.OrderedDict):
def __init__(self, names=_PRIMITIVES):
+ odict.OrderedDict.__init__(self)
self.__defaults = names
def get(self, key):
- val = dict.get(self, key)
+ val = odict.OrderedDict.get(self, key)
if val is None:
val = self.__defaults.get(key)
return val
def __setitem__(self, key, val):
- if dict.get(self, key) is not None:
+ if odict.OrderedDict.get(self, key) is not None:
raise SchemaParseException("Can't redefine: "+ key.__str__())
- dict.__setitem__(self, key, val)
+ odict.OrderedDict.__setitem__(self, key, val)
class AvroException(Exception):
pass
@@ -342,6 +411,19 @@
raise SchemaParseException("No field type: "+field.__str__())
fields.append((fieldname, _parse(fieldtype, names)))
return schema
+ elif type == "enum":
+ name = obj.get("name")
+ namespace = obj.get("namespace")
+ symbolsnode = obj.get("symbols")
+ if symbolsnode == None or not isinstance(symbolsnode, list):
+ raise SchemaParseException("Enum has no symbols: "+obj.__str__())
+ symbols = list()
+ for symbol in symbolsnode:
+ symbols.append(symbol)
+ schema = _EnumSchema(name, namespace, symbols)
+ if name is not None:
+ names[name] = schema
+ return schema
elif type == "array":
return _ArraySchema(_parse(obj.get("items"), names))
elif type == "map":
Modified: hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java (original)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/RandomData.java Tue May 19 23:35:39 2009
@@ -66,6 +66,9 @@
for (Map.Entry<String, Schema> entry : schema.getFieldSchemas())
record.put(entry.getKey(), generate(entry.getValue(), random, d+1));
return record;
+ case ENUM:
+ List<String> symbols = schema.getEnumSymbols();
+ return symbols.get(random.nextInt(symbols.size()));
case ARRAY:
int length = (random.nextInt(5)+2)-d;
GenericArray<Object> array = new GenericData.Array(length<=0?0:length);
Modified: hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java?rev=776496&r1=776495&r2=776496&view=diff
==============================================================================
--- hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java (original)
+++ hadoop/avro/trunk/src/test/java/org/apache/avro/TestProtocolGeneric.java Tue May 19 23:35:39 2009
@@ -114,6 +114,7 @@
GenericRecord record =
new GenericData.Record(PROTOCOL.getTypes().get("TestRecord"));
record.put("name", new Utf8("foo"));
+ record.put("kind", "BAR");
GenericRecord params =
new GenericData.Record(PROTOCOL.getMessages().get("echo").getRequest());
params.put("record", record);