python dict计时之谜

scores1 = create_score_dict_from_file('lcs_scores.txt') scores2 = create_score_dict(find_alp(s1, s2), match=1, mismatch=0, indel=0) print scores1 == scores2 # True alignment(s1, s2, scores1) # gives right answer in about 12s alignment(s1, s2, scores2) # gives right answer in about 4s

import numpy as np from time import time def create_scores_from_file(score_file, sigma=0): """ Creates a dict of the scores for each pair in an alphabet, as well as each indel (an amino acid, paired with '-'), which is scored -sigma. """ f = open(score_file, 'r') alp = f.readline().strip().split() scores = [] for line in f: scores.append(map(int, line.strip().split()[1:])) f.close() scores = np.array(scores) score_dict = {} for c1 in range(len(alp)): score_dict[(alp[c1], '-')] = -sigma score_dict[('-', alp[c1])] = -sigma for c2 in range(len(alp)): score_dict[(alp[c1], alp[c2])] = scores[c1, c2] return score_dict def score_matrix(alp=('A', 'C', 'G', 'T'), match=1, mismatch=0, indel=0): score_dict = {} for c1 in range(len(alp)): score_dict[(alp[c1], '-')] = indel score_dict[('-', alp[c1])] = indel for c2 in range(len(alp)): score_dict[(alp[c1], alp[c2])] = match if c1 == c2 else mismatch return score_dict def use_dict_in_function(n, d): start = time() count = 0 for i in xrange(n): for k in d.keys(): count += d[k] print "Time: ", time() - start return count def timing_test(): alp = tuple('A C D E F G H I K L M N P Q R S T V W Y'.split()) scores1 = create_scores_from_file('lcs_scores.txt') scores2 = score_matrix(alp, match=1, mismatch=0, indel=0) print type(scores1), id(scores1) print type(scores2), id(scores2) print repr(scores1) print repr(scores2) print type(list(scores1)[0][0]) print type(list(scores2)[0][0]) print scores1 == scores2 print repr(scores1) == repr(scores2) n = 10000 use_dict_in_function(n, scores1) use_dict_in_function(n, scores2) if __name__ == "__main__": timing_test()

<type 'dict'> 140309927965024 <type 'dict'> 140309928036128 {('S', 'W'): 0, ('G', 'G'): 1, ('E', 'M'): 0, ('P', '-'): 0,... (440 key: values) {('S', 'W'): 0, ('G', 'G'): 1, ('E', 'M'): 0, ('P', '-'): 0,... (440 key: values) <type 'str'> <type 'str'> True True Time: 1.51075315475 Time: 0.352770090103

A C D E F G H I K L M N P Q R S T V W Y A 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 C 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 D 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 E 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 F 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 G 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 H 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 I 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 K 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 L 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 M 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 N 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 P 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 Q 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 R 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 S 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 T 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 V 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 W 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 Y 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1

1条回答

网友

1楼 · 发布于 2024-06-08 00:18:34

哪个版本的Python？并打印每个dict的repr()，以确保它们确实是相同的（不是只是它们比较相等）。我猜不到。例如，您可能正在使用python2，在一种情况下，char1和char2是普通字符串，但在另一种情况下，它们是Unicode字符串。然后比较会说它们是相同的，但是repr()会显示出不同：

>>> d1 = {"a": 1}
>>> d2 = {u"a": 1}
>>> d1 == d2
True
>>> print repr(d1), repr(d2)
{'a': 1} {u'a': 1}

无论如何，在CPython中，绝对没有任何任何对象来自的内部“元数据”记录。你知道吗

编辑-要尝试的内容

很好地解决了这个问题！这正成为一种乐趣：-）我想让你试试。首先注释这行：

    scores = np.array(scores)

然后更改此行：

            score_dict[(alp[c1], alp[c2])] = scores[c1, c2]

收件人：

            score_dict[(alp[c1], alp[c2])] = scores[c1][c2]
                                                    ^^^^^^

当我这样做时，两个方法返回的时间基本相同。我不是numpy专家，但我的猜测是，您的“from file”代码对dict值使用的是机器原生的numpy整数类型，而且无论何时使用这些值，都有大量的开销将其转换为Python整数。你知道吗

或者也许不是-但这是我现在的猜测，我坚持；-）

相关问题更多 >

编程相关推荐

热门问题

热门文章