Python: 尝试创建一个包含有限MRU条目的字典

Question

我正在尝试创建一个 dict，这个字典只包含有限数量的最近使用（MRU）条目，目的是为了缓存我通过 ctypes 调用的一个耗时的 C 函数的输出。以下是我的代码：

from collections import OrderedDict

class MRUDict(OrderedDict):

    def __init__(self, capacity = 64):
        super().__init__()
        self.__checkAndSetCapacity(capacity)

    def capacity(self):
        return self.__capacity

    def setCapacity(self, capacity):
        self.__checkAndSetCapacity(capacity)
        for i in range(len(self) - capacity):
            self.__evict() # will execute only if len > capacity

    def __getitem__(self, key):
        value = super().__getitem__(key)
        # if above raises IndexError, next line won't execute
        print("Moving key {} to last i.e. MRU position".format(key))
        super().move_to_end(key)
        return value

    def __setitem__(self, key, value):
        if key in self:
            super().move_to_end(key)
        else: # new key
            if len(self) == self.__capacity:
                self.__evict()
        super().__setitem__(key, value)

    def __evict(self):
        key, value = self.popitem(last = False) # pop first i.e. oldest item
        print("Capacity exceeded. Evicting ({}, {})".format(key, value))

    def __checkAndSetCapacity(self, capacity):
        if not isinstance(capacity, int):
            raise TypeError("Capacity should be an int.")
        if capacity == 0:
            raise ValueError("Capacity should not be zero.")
        self.__capacity = capacity

... 这是测试代码：

def printkeys(d):
    print("Current keys in order:", tuple(d)) # here d means d.keys()
    print()

from mrudict import MRUDict
print("Creating MRUDict with capacity 5.")
d = MRUDict(5)
print("Adding keys 0 to 7 with values:")
for i in range(8): d[i] = i + 0.1
printkeys(d)

print("Calling str on object:")
print(d) # test of default __repr__ (since probably __str__ is the same here)
printkeys(d)

print("Accessing existing key 4:")
print(4, d[4]) # test of __getitem__
printkeys(d)

try:
    print("Accessing non-existing key 20:")
    print(20, d[20]) # test of __getitem__
except:
    print("Caught exception: key does not exist.")
printkeys(d)

print("Updating value of existing key 6:")
d[6] = 6.6 # test of __setitem__ with existing key
printkeys(d)

print("Adding new key, value pair:")
d[10] = 10.1 # test of __setitem__ with non-existing key
printkeys(d)

print("Testing for presence of key 3:")
print(3 in d)
printkeys(d)

print("Trying to loop over the items:")
for k in d: print(k, d[k])
printkeys(d)

print("Trying to loop over the items:")
for k, v in d.items(): print(k, v)
printkeys(d)

从输出结果来看，我在实现 __getitem__ 函数时似乎有点天真，因为无论是 __repr__ 还是 for ... in（我猜这两个会调用 __iter__ 然后再调用 __getitem__），都会导致第一个条目被移动到最后，作为最近使用的条目，但接下来就无法继续了，因为迭代器现在指向最后一个元素，没有“下一个”元素了。我不太确定该怎么解决这个问题。我是否应该重新实现 __iter__？

我不太确定如何区分用户调用 __getitem__ 和内部调用同一个函数。当然，有一个变通办法是让用户使用 find() 方法，这样可以实现移动到最后的功能，但我真的希望能够使用常规的语法 d[k]。

请给我一些建议，如何解决这个问题。谢谢！

ctypes caching data structure dictionary algorithm design performance tuning mru function optimization

Python: 尝试创建一个包含有限MRU条目的字典

1 个回答

撰写回答