Python优化

2 投票

7 回答

1290 浏览

提问于 2025-04-15 20:21

f = open('wl4.txt', 'w')
hh = 0
######################################
for n in range(1,5):
    for l in range(33,127):
        if n==1:
            b = chr(l) + '\n'
            f.write(b)
            hh += 1 
        elif n==2:           
            for s0 in range(33, 127):
                b = chr(l) + chr(s0) + '\n'
                f.write(b)
                hh += 1
        elif n==3:          
            for s0 in range(33, 127):
                for s1 in range(33, 127):
                    b = chr(l) + chr(s0) + chr(s1) + '\n'
                    f.write(b)
                    hh += 1 
        elif n==4:    
            for s0 in range(33, 127):
                for s1 in range(33, 127):
                    for s2 in range(33,127):
            b = chr(l) + chr(s0) + chr(s1) + chr(s2) + '\n'
            f.write(b)
            hh += 1 
######################################
print "We Made %d Words." %(hh)
######################################
f.close()

那么，有什么方法可以让它更快吗？

性能优化代码效率

7 个回答

当你需要进行很多重复操作时，可以从 itertools 这个包开始。

在这种情况下，你可能需要用到 product 函数。这个函数可以给你：

笛卡尔积，相当于一个嵌套的 for 循环

所以如果你想得到你正在创建的“单词”列表：

from itertools import product

chars = map(chr, xrange(33,127))  # create a list of characters
words = []                        # this will be the list of words

for length in xrange(1, 5):       # length is the length of the words created
    words.extend([''.join(x) for x in product(chars, repeat=length)])

# instead of keeping a separate counter, hh, we can use the len function
print "We Made %d Words." % (len(words))  

f = open('wl4.txt', 'w')
f.write('\n'.join(words))         # write one word per line
f.close()

这样我们就得到了你的脚本输出的结果。而且因为 itertools 是用 c 语言实现的，所以它的运行速度也更快。

编辑：

根据 John Machin 的评论，关于内存使用的问题，这里有更新的代码，当我在整个 range(33, 127) 上运行时不会出现内存错误。

from itertools import product

chars = map(chr, xrange(33,127))  # create a list of characters
f_words = open('wl4.txt', 'w')

num_words = 0                     # a counter (was hh in OPs code)
for length in xrange(1, 5):       # length is the length of the words created
    for char_tup in product(chars, repeat=length):
        f_words.write(''.join(char_tup) + '\n')
        num_words += 1

f.close()
print "We Made %d Words." % (num_words)

在我的机器上，这段代码大约运行了 4 分钟（240 秒）。

回答于 2025-04-15 由 Python大师

分享举报

还有很多显著的改进空间。

下面这个脚本文件展示了这些改进，出于简洁考虑，只使用了大小为4的循环（这个循环占用了90%以上的时间）。

方法0：原作者的代码

方法1：John Kugleman的解决方案

方法2：（1）并将一些字符串拼接移出内层循环

方法3：（2）并将代码放入一个函数中——访问局部变量比访问全局变量快得多。任何脚本都可以这样做。很多脚本都应该这样做。

方法4：（3）并在列表中累积字符串，然后再将它们连接起来写入。注意，这样会使用大量内存，可能让你难以置信。我的代码没有尝试对整个文件进行处理，因为（127 - 33）** 4是78M个字符串。在32位系统上，仅列表就需要78 * 4 = 312Mb的内存（不算列表末尾未使用的内存），再加上78 * 28 = 2184 Mb用于字符串对象（sys.getsizeof("1234")返回28），再加上78 * 5 = 390 Mb用于连接结果。这样会让你的用户地址空间超出限制，或者让你的u限（用户限制）出问题，或者其他一些可能出问题的地方。如果你有1Gb的实际内存，其中128Mb被显卡驱动占用了，但交换空间足够，你就可以去吃午饭（如果运行特定的操作系统，甚至可以吃晚饭）。

方法5：（4）并且不要每次都问列表它的append属性在哪里，78百万次 :-)

下面是脚本文件：

import time, sys
time_function = time.clock # Windows; time.time may be better on *x
ubound, which = map(int, sys.argv[1:3])
t0 = time_function()
if which == 0:
    ### original ###
    f = open('wl4.txt', 'w')
    hh = 0
    n = 4
    for l in range(33, ubound):
        if n == 1:
            pass
        elif n == 2:
            pass
        elif n == 3:
            pass
        elif n == 4:
            for s0 in range(33, ubound):
                for s1 in range(33, ubound):
                    for s2 in range(33,ubound):
                        b = chr(l) + chr(s0) + chr(s1) + chr(s2) + '\n'
                        f.write(b)
                        hh += 1
    f.close()
elif which == 1:
    ### John Kugleman ###
    f = open('wl4.txt', 'w')
    chars = [chr(c) for c in range(33, ubound)]
    hh = 0
    for l in chars:
        for s0 in chars:
            for s1 in chars:
                for s2 in chars:
                    b = l + s0 + s1 + s2 + '\n'
                    f.write(b)
                    hh += 1
    f.close()
elif which == 2:
    ### JohnK, saving + ###
    f = open('wl4.txt', 'w')
    chars = [chr(c) for c in range(33, ubound)]
    hh = 0
    for L in chars: # "L" as in "Legible" ;-)
        for s0 in chars:
            b0 = L + s0
            for s1 in chars:
                b1 = b0 + s1
                for s2 in chars:
                    b = b1 + s2 + '\n'
                    f.write(b)
                    hh += 1
    f.close()
elif which == 3:
    ### JohnK,  saving +, function ###
    def which3func():
        f = open('wl4.txt', 'w')
        chars = [chr(c) for c in range(33, ubound)]
        nwords = 0
        for L in chars:
            for s0 in chars:
                b0 = L + s0
                for s1 in chars:
                    b1 = b0 + s1
                    for s2 in chars:
                        b = b1 + s2 + '\n'
                        f.write(b)
                        nwords += 1
        f.close()
        return nwords
    hh = which3func()
elif which == 4:
    ### JohnK, saving +, function, linesep.join() ###
    def which4func():
        f = open('wl4.txt', 'w')
        chars = [chr(c) for c in range(33, ubound)]
        nwords = 0
        for L in chars:
            accum = []
            for s0 in chars:
                b0 = L + s0
                for s1 in chars:
                    b1 = b0 + s1
                    for s2 in chars:
                        accum.append(b1 + s2)
            nwords += len(accum)
            accum.append("") # so that we get a final newline
            f.write('\n'.join(accum))
        f.close()
        return nwords
    hh = which4func()
elif which == 5:
    ### JohnK, saving +, function, linesep.join(), avoid method lookup in loop ###
    def which5func():
        f = open('wl4.txt', 'w')
        chars = [chr(c) for c in range(33, ubound)]
        nwords = 0
        for L in chars:
            accum = []; accum_append = accum.append
            for s0 in chars:
                b0 = L + s0
                for s1 in chars:
                    b1 = b0 + s1
                    for s2 in chars:
                        accum_append(b1 + s2)
            nwords += len(accum)
            accum_append("") # so that we get a final newline
            f.write('\n'.join(accum))
        f.close()
        return nwords
    hh = which5func()
else:
    print "Bzzzzzzt!!!"
t1 = time_function()
print "Method %d made %d words in %.1f seconds" % (which, hh, t1 - t0)

以下是一些结果：

C:\junk\so>for %w in (0 1 2 3 4 5) do \python26\python wl4.py 127 %w

C:\junk\so>\python26\python wl4.py 127 0
Method 0 made 78074896 words in 352.3 seconds

C:\junk\so>\python26\python wl4.py 127 1
Method 1 made 78074896 words in 183.9 seconds

C:\junk\so>\python26\python wl4.py 127 2
Method 2 made 78074896 words in 157.9 seconds

C:\junk\so>\python26\python wl4.py 127 3
Method 3 made 78074896 words in 126.0 seconds

C:\junk\so>\python26\python wl4.py 127 4
Method 4 made 78074896 words in 68.3 seconds

C:\junk\so>\python26\python wl4.py 127 5
Method 5 made 78074896 words in 60.5 seconds

根据原作者的问题更新

"""当我尝试添加for循环时，我在accum_append上得到了内存错误..这是怎么回事？？"""

我不知道问题出在哪里；我无法在这个距离阅读你的代码。猜测：如果你试图做长度==5，你可能把accum的初始化和写入部分放错了地方，导致accum试图超出你系统内存的容量（正如我之前希望解释的那样）。

"""现在方法5是最快的，但它只能生成长度为4的单词..我该如何设置我想要的长度呢？ :)"""

你有两个选择：（1）继续使用嵌套的for循环（2）查看那些不使用嵌套for循环的答案，长度可以动态指定。

方法4和5通过使用accum获得了速度提升，但这样做的方式是根据确切的内存使用量量身定制的。

下面还有3种方法。101是tgray的方法，没有额外的内存使用。201是Paul Hankin的方法（加上一些写入文件的代码），同样没有额外的内存使用。这两种方法的速度差不多，接近方法3的速度。它们都允许动态指定所需的长度。

方法102是tgray的方法，使用固定的1Mb缓冲区——它试图通过减少对f.write()的调用次数来节省时间……你可能想尝试不同的缓冲区大小。如果你愿意，可以创建一个独立的202方法。注意，tgray的方法使用了itertools.product，你需要Python 2.6，而Paul Hankin的方法使用了生成器表达式，这种方法已经存在一段时间了。

elif which == 101:
    ### tgray, memory-lite version
    def which101func():
        f = open('wl4.txt', 'w')
        f_write = f.write
        nwords = 0
        chars = map(chr, xrange(33, ubound))  # create a list of characters
        length = 4 #### length is a variable
        for x in product(chars, repeat=length):
            f_write(''.join(x) + '\n')
            nwords += 1
        f.close()
        return nwords
    hh = which101func()
elif which == 102:
    ### tgray, memory-lite version, buffered
    def which102func():
        f = open('wl4.txt', 'w')
        f_write = f.write
        nwords = 0
        chars = map(chr, xrange(33, ubound))  # create a list of characters
        length = 4 #### length is a variable
        buffer_size_bytes = 1024 * 1024
        buffer_size_words = buffer_size_bytes // (length + 1)
        words_in_buffer = 0
        buffer = []; buffer_append = buffer.append
        for x in product(chars, repeat=length):
            words_in_buffer += 1
            buffer_append(''.join(x) + '\n')
            if words_in_buffer >= buffer_size_words:
                f_write(''.join(buffer))
                nwords += words_in_buffer
                words_in_buffer = 0
                del buffer[:]
        if buffer:
            f_write(''.join(buffer))
            nwords += words_in_buffer
        f.close()
        return nwords
    hh = which102func()
elif which == 201:
    ### Paul Hankin (needed output-to-file code added)
    def AllWords(n, CHARS=[chr(i) for i in xrange(33, ubound)]):
        #### n is the required word length
        if n == 1: return CHARS
        return (w + c for w in AllWords(n - 1) for c in CHARS)
    def which201func():
        f = open('wl4.txt', 'w')
        f_write = f.write
        nwords = 0
        for w in AllWords(4):
            f_write(w + '\n')
            nwords += 1
        f.close()
        return nwords
    hh = which201func()

回答于 2025-04-15 由 Python大师

分享举报

你可以一次性创建一个从33到127的数字范围，保存起来。这样就不用每次都重新创建，这样能把运行时间减少一半，在我的电脑上效果明显。

chars = [chr(c) for c in range(33, 127)]

...

for s0 in chars:
    for s1 in chars:
        for s2 in chars:
            b = l + s0 + s1 + s2 + '\n'
            f.write(b)
            hh += 1

回答于 2025-04-15 由 Python大师

分享举报

Python优化

7 个回答

撰写回答