modern python dictionaries
0

本文是pycon 2017上的演讲, 主要涉及python 字典的实现,以及如何实现一个更加高效的字典, 主要改进的地方就是将key的hash值进行保存,当rehash的时候不用重新计算所有的key值,可以大大提高效率

视频地址: YouTube

import array
import collections
import itertools

FREE = -1
DUMMY = -2

class Dict(collections.MutableMapping):
    'Space efficient dictionary with fast iteration and cheap resizes'

    @staticmethod
    def _gen_probes(hashvalue, mask):
        'Same sequence of probes used in the current dictionary design'
        PERTURB_SHIFT = 5
        if hashvalue < 0:
            hashvalue = -hashvalue
        i = hashvalue & mask

        yield i

        perturb = hashvalue
        while True:
            i = (5 * i + perturb + 1) & 0xFFFFFFFFFFFFFFFF
            yield i & mask
            perturb >>= PERTURB_SHIFT

    def _lookup(self, key, hashvalue):
        'Same lookup logic as currently used in real dicts'
        assert self.filled < len(self.indices)    # At least one open slot
        freeslot = None

        for i in self._gen_probes(hashvalue, len(self.indices)-1):
            index = self.indices[i]
            if index == FREE:
                return (FREE, i) if freeslot is None else (DUMMY, freeslot)
            elif index == DUMMY:
                if freeslot is None:
                    freeslot = i
            elif (self.keylist[index] is key or self.hashlist[index] == hashvalue and self.keylist[index] == key):
                return (index, i)

    @staticmethod
    def _make_index(n):
        'New sequence of indices using the smallest possible datatype'
        if n <= 2 ** 7: return array.array('b', [FREE]) * n    # signed char
        if n <= 2 ** 15: return array.array('h', [FREE]) * n   # signed short
        if n <= 2 ** 31: return array.array('l', [FREE]) * n   # signed long
        return [FREE] * n

    def _resize(self, n):
        '''
        Reindex the existing hash/key/value entries
        Entries do not get moved, they only get new indices
        No calls are made to hash() or __eq__().
        :return:
        '''
        n = 2 ** n.bit_length()
        self.indices = self._make_index(n)
        for index, hashvalue in enumerate(self.hashlist):
            for i in Dict._gen_probes(hashvalue, n - 1):
                if self.indices[i] == FREE:
                    break
            self.indices[i] = index
        self.filled = self.used

    def clear(self):
        self.indices = self._make_index(8)
        self.hashlist = []
        self.keylist = []
        self.valuelist = []
        self.used = 0
        self.filled = 0

    def __getitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            raise KeyError(key)
        return self.valuelist[index]

    def __setitem__(self, key, value):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            self.indices[i] = self.used
            self.hashlist.append(hashvalue)
            self.keylist.append(key)
            self.valuelist.append(value)
            self.used += 1
            if index == FREE:
                self.filled += 1
                if self.filled * 3 > len(self.indices) * 2:
                    self._resize(4 * len(self))

        else:
            self.valuelist[index] = value

    def __delitem__(self, key):
        hashvalue = hash(key)
        index, i = self._lookup(key, hashvalue)
        if index < 0:
            raise KeyError(key)

        self.indices[i] = DUMMY
        self.used -= 1
        # if needed swap with the lastmost entry to avoid leaving a "hole"
        if index != self.used:
            lasthash = self.hashlist[-1]
            lastkey = self.keylist[-1]
            lastvalue = self.valuelist[-1]
            lastindex, j = self._lookup(lastkey, lasthash)
            assert lastindex >= 0 and i != j
            self.indices[j] = index
            self.hashlist[index] = lasthash
            self.keylist[index] = lastkey
            self.valuelist[index] = lastvalue
        self.hashlist.pop()
        self.keylist.pop()
        self.valuelist.pop()

    def __init__(self, *args, **kwargs):
        if not hasattr(self, 'keylist'):
            self.clear()
        self.update(*args, **kwargs)

    def __len__(self):
        return self.used

    def __iter__(self):
        return iter(self.keylist)

    def iterkeys(self):
        return iter(self.keylist)

    def keys(self):
        return list(self.keylist)

    def itervalues(self):
        return iter(self.valuelist)

    def values(self):
        return list(self.valuelist)

    def iteritems(self):
        return itertools.izip(self.keylist, self.valuelist)

    def items(self):
        return zip(self.keylist, self.valuelist)

    def __contains__(self, key):
        index, i = self._lookup(key, hash(key))
        return index >= 0

    def get(self, key, default=None):
        index, i = self._lookup(key, hash(key))
        return self.valuelist[index] if index >= 0 else default

    def popitem(self):
        if not self.keylist:
            raise KeyError('popitem(): dictionary is empty')
        key = self.keylist[-1]
        value = self.valuelist[-1]
        del self[key]
        return key, value

    def __repr__(self):
        return 'Dict(%r)' % self.items()

    def show_structure(self):
        'Diagnostic method. not part of the api'

        print '=' * 50
        print self
        print 'Indices:', self.indices

        for i, row in enumerate(zip(self.hashlist, self.keylist, self.valuelist)):
            print i, row

        print '-' * 50

if __name__ == '__main__':
    d = Dict([('timmy', 'red'), ('barry', 'green'), ('guido', 'blue')])
    d.update({'data': 'test'})
    d.update({'data1': 'test1'})
    d.update({'data2': 'test2'})
    d.show_structure()
讨论数量: 0
(= ̄ω ̄=)··· 暂无内容!

  • 请注意单词拼写,以及中英文排版,参考此页
  • 支持 Markdown 格式, **粗体**、~~删除线~~、`单行代码`, 更多语法请见这里 Markdown 语法
  • 支持表情,使用方法请见 Emoji 自动补全来咯,可用的 Emoji 请见 :metal: :point_right: Emoji 列表 :star: :sparkles:
  • 上传图片, 支持拖拽和剪切板黏贴上传, 格式限制 - jpg, png, gif
  • 发布框支持本地存储功能,会在内容变更时保存,「提交」按钮点击时清空
  请勿发布不友善或者负能量的内容。与人为善,比聪明更重要!