Why does overriding __contains__ break OrderedDict.keys?

I’m subclasssing OrderedDict (Cpython, 2.7.3) to represent a datafile. __getitem__ pulls a field out of the datafile and sets it on the current instance similar to the code I’ve posted below. now I would like to override __contains__ to return True if the field is in the dictionary or in the file on the disk since it can be read either way. However, this seems to break OrderedDict‘s ability to inspect it’s keys.

from collections import OrderedDict

dictclass = OrderedDict

class Foo(dictclass):
    def __getitem__(self,key):
        try:
            return dictclass.__getitem__(self,key)
        except KeyError:
            pass

        data = key*2
        self[key] = data
        return data

    def __contains__(self,whatever):
        return dictclass.__contains__(self,whatever) or 'bar' in whatever

a = Foo()
print a['bar']
print a.keys()

If you run the code above, you’ll get this output:

barbar
[]

Note that if you change dictclass = dict in the above code, it still seems to work (giving the following output).

barbar
['bar']

Am I doing something horribly wrong?

Best answer

When **Foo.__contains__** is not defined:

a['bar']

calls Foo.__getitem__, which executes

    self[key] = data

This calls OrderedDict.__setitem__, which is defined this way:

def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
    'od.__setitem__(i, y) <==> od[i]=y'
    # Setting a new item creates a new link at the end of the linked list,
    # and the inherited dictionary is updated with the new key/value pair.
    if key not in self:
        root = self.__root
        last = root[PREV]
        last[NEXT] = root[PREV] = self.__map[key] = [last, root, key]
    dict_setitem(self, key, value)

Since Foo.__contains__ is not defined,

    if key not in self:

is True. So the key is properly added to self.__root and self.__map.

When **Foo.__contains__** is defined,

    if key not in self:

if False. So the key is not properly added to self.__root and self.__map.
Foo.__contains__ effective fools OrderedDict.__setitem__ into thinking that the 'bar' key has already been added.


I found it helpful to play with the following code (adding print statements in __setitem__ and __iter__):

from collections import OrderedDict

dictclass = OrderedDict

class Foo(dictclass):
    def __getitem__(self,key):
        try:
            return dictclass.__getitem__(self,key)
        except KeyError:
            pass

        data = key*2
        self[key] = data
        return data

    def __contains__(self,whatever):
        print('contains: {}'.format(whatever))
        return dictclass.__contains__(self,whatever) or 'bar' in whatever

    def __setitem__(self, key, value, PREV=0, NEXT=1, dict_setitem=dict.__setitem__):
        'od.__setitem__(i, y) <==> od[i]=y'
        # Setting a new item creates a new link at the end of the linked list,
        # and the inherited dictionary is updated with the new key/value pair.
        print('key not in self: {}'.format(key not in self))
        if key not in self:
            root = self._OrderedDict__root
            last = root[PREV]
            last[NEXT] = root[PREV] = self._OrderedDict__map[key] = [last, root, key]
        dict_setitem(self, key, value)

    def __iter__(self):
        'od.__iter__() <==> iter(od)'
        # Traverse the linked list in order.
        NEXT, KEY = 1, 2

        root = self._OrderedDict__root
        curr = root[NEXT]
        print('curr: {}'.format(curr))
        print('root: {}'.format(root)) 
        print('curr is not root: {}'.format(curr is not root))

        while curr is not root:
            yield curr[KEY]
            curr = curr[NEXT]

a = Foo()
print a['bar']
# barbar

print a.keys()
# ['bar']

Notice that you can avoid this problem by making Foo a subclass of collections.MutableMapping and delegating most of its behavior to a OrderedDict attribute:

import collections
dictclass = collections.OrderedDict

class Foo(collections.MutableMapping):
    def __init__(self, *args, **kwargs):
        self._data = dictclass(*args, **kwargs)
    def __setitem__(self, key, value):
        self._data[key] = value
    def __delitem__(self, key):
        del self._data[key]
    def __iter__(self):
        return iter(self._data)
    def __len__(self):
        return len(self._data)

    def __getitem__(self,key):
        try:
            return self._data[key]
        except KeyError:
            pass

        data = key*2
        self[key] = data
        return data

    def __contains__(self,whatever):
        return dictclass.__contains__(self,whatever) or 'bar' in whatever

which yields

a = Foo()
print a['bar']
# barbar

print a.keys()
# ['bar']

even with __contains__ defined.