Python字符串格式化：“%”是否比“format”函数更有效？

import time def timing(f, n, show, *args): if show: print f.__name__ + ":\t", r = range(n/10) t1 = time.clock() for i in r: f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args); f(*args) t2 = time.clock() timing = round(t2-t1, 3) if show: print timing return timing #Class class values(object): def __init__(self, a, b, c="", d=""): self.a = a self.b = b self.c = c self.d = d def test_plus(a, b): return a + "-" + b def test_percent(a, b): return "%s-%s" % (a, b) def test_join(a, b): return ''.join([a, '-', b]) def test_format(a, b): return "{}-{}".format(a, b) def test_formatC(val): return "{0.a}-{0.b}".format(val) def test_plus_long(a, b, c, d): return a + "-" + b + "-" + c + "-" + d def test_percent_long(a, b, c, d): return "%s-%s-%s-%s" % (a, b, c, d) def test_join_long(a, b, c, d): return ''.join([a, '-', b, '-', c, '-', d]) def test_format_long(a, b, c, d): return "{0}-{1}-{2}-{3}".format(a, b, c, d) def test_formatC_long(val): return "{0.a}-{0.b}-{0.c}-{0.d}".format(val) def test_plus_long2(a, b, c, d): return a + "-" + b + "-" + c + "-" + d + "-" + a + "-" + b + "-" + c + "-" + d def test_percent_long2(a, b, c, d): return "%s-%s-%s-%s-%s-%s-%s-%s" % (a, b, c, d, a, b, c, d) def test_join_long2(a, b, c, d): return ''.join([a, '-', b, '-', c, '-', d, '-', a, '-', b, '-', c, '-', d]) def test_format_long2(a, b, c, d): return "{0}-{1}-{2}-{3}-{0}-{1}-{2}-{3}".format(a, b, c, d) def test_formatC_long2(val): return "{0.a}-{0.b}-{0.c}-{0.d}-{0.a}-{0.b}-{0.c}-{0.d}".format(val) def test_plus_superlong(lst): string = "" for i in lst: string += str(i) return string def test_join_superlong(lst): return "".join([str(i) for i in lst]) def mean(numbers): return float(sum(numbers)) / max(len(numbers), 1) nb_times = int(1e6) n = xrange(5) lst_numbers = xrange(1000) from collections import defaultdict metrics = defaultdict(list) list_functions = [ test_plus, test_percent, test_join, test_format, test_formatC, test_plus_long, test_percent_long, test_join_long, test_format_long, test_formatC_long, test_plus_long2, test_percent_long2, test_join_long2, test_format_long2, test_formatC_long2, # test_plus_superlong, test_join_superlong, ] val = values("123", "456", "789", "0ab") for i in n: for f in list_functions: print ".", name = f.__name__ if "formatC" in name: t = timing(f, nb_times, False, val) elif '_long' in name: t = timing(f, nb_times, False, "123", "456", "789", "0ab") elif '_superlong' in name: t = timing(f, nb_times, False, lst_numbers) else: t = timing(f, nb_times, False, "123", "456") metrics[name].append(t) #Get Average print "\n===AVERAGE OF TIMINGS===" for f in list_functions: name = f.__name__ timings = metrics[name] print "{:>20}:\t{:0.5f}".format(name, mean(timings))

1条回答

网友

1楼 · 发布于 2024-06-02 07:51:49

是的，%字符串格式比.format方法快
很有可能（这可能有更好的解释），因为%是一种语法符号（因此执行速度很快），而.format至少涉及一个额外的方法调用
因为属性值访问还涉及一个额外的方法调用，即。__getattr__

我使用各种格式化方法的timeit运行了一个稍微好一点的分析（在Python 3.6.0上），其结果如下（用BeautifulTable打印得很漂亮）

+-----------------+-------+-------+-------+-------+-------+--------+
| Type \ num_vars |   1   |   2   |   5   |  10   |  50   |  250   |
+-----------------+-------+-------+-------+-------+-------+--------+
|    f_str_str    | 0.306 | 0.064 | 0.106 | 0.183 | 0.737 | 3.422  |
+-----------------+-------+-------+-------+-------+-------+--------+
|    f_str_int    | 0.295 | 0.174 | 0.385 | 0.686 | 3.378 | 16.399 |
+-----------------+-------+-------+-------+-------+-------+--------+
|   concat_str    | 0.012 | 0.053 | 0.156 | 0.31  | 1.707 | 16.762 |
+-----------------+-------+-------+-------+-------+-------+--------+
|    pct_s_str    | 0.056 | 0.178 | 0.275 | 0.469 | 1.872 | 9.191  |
+-----------------+-------+-------+-------+-------+-------+--------+
|    pct_s_int    | 0.128 | 0.208 | 0.343 | 0.605 | 2.483 | 13.24  |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format_str  | 0.418 | 0.217 | 0.343 | 0.58  | 2.241 | 11.163 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format_int  | 0.416 | 0.277 | 0.476 | 0.811 | 3.378 | 17.829 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format2_str | 0.433 | 0.242 | 0.416 | 0.675 | 3.152 | 16.783 |
+-----------------+-------+-------+-------+-------+-------+--------+
| dot_format2_int | 0.428 | 0.298 | 0.541 | 0.933 | 4.444 | 24.767 |
+-----------------+-------+-------+-------+-------+-------+--------+

后面的_str&；_int表示对各个值类型执行的操作。

请注意单个变量的concat_str结果本质上只是字符串本身，因此不应该真正考虑它。

我得出结果的准备-

from timeit import timeit
from beautifultable import BeautifulTable  # pip install beautifultable

times = {}

for num_vars in (1, 2, 5, 10, 50, 250):
    f_str = "f'{" + '}{'.join([f'x{i}' for i in range(num_vars)]) + "}'"
    # "f'{x0}{x1}"
    concat = '+'.join([f'x{i}' for i in range(num_vars)])
    # 'x0+x1'
    pct_s = '"' + '%s'*num_vars + '" % (' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"%s%s" % (x0,x1)'
    dot_format = '"' + '{}'*num_vars + '".format(' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"{}{}".format(x0,x1)'
    dot_format2 = '"{' + '}{'.join([f'{i}' for i in range(num_vars)]) + '}".format(' + ','.join([f'x{i}' for i in range(num_vars)]) + ')'
    # '"{0}{1}".format(x0,x1)'

    vars = ','.join([f'x{i}' for i in range(num_vars)])
    vals_str = tuple(map(str, range(num_vars)))
    setup_str = f'{vars} = {vals_str}'
    # "x0,x1 = ('0', '1')"
    vals_int = tuple(range(num_vars))
    setup_int = f'{vars} = {vals_int}'
    # 'x0,x1 = (0, 1)'

    times[num_vars] = {
        'f_str_str': timeit(f_str, setup_str),
        'f_str_int': timeit(f_str, setup_int),
        'concat_str': timeit(concat, setup_str),
        # 'concat_int': timeit(concat, setup_int), # this will be summation, not concat
        'pct_s_str': timeit(pct_s, setup_str),
        'pct_s_int': timeit(pct_s, setup_int),
        'dot_format_str': timeit(dot_format, setup_str),
        'dot_format_int': timeit(dot_format, setup_int),
        'dot_format2_str': timeit(dot_format2, setup_str),
        'dot_format2_int': timeit(dot_format2, setup_int),
    }

table = BeautifulTable()
table.column_headers = ['Type \ num_vars'] + list(map(str, times.keys()))
# Order is preserved, so I didn't worry much
for key in ('f_str_str', 'f_str_int', 'concat_str', 'pct_s_str', 'pct_s_int', 'dot_format_str', 'dot_format_int', 'dot_format2_str', 'dot_format2_int'):
    table.append_row([key] + [times[num_vars][key] for num_vars in (1, 2, 5, 10, 50, 250)])
print(table)

我不能超过num_vars=250，因为一些最大参数（255）限制是timeit。

tl；dr-Python字符串格式化性能：f-strings是最快和更优雅的，但有时（由于某些implementation restrictions&；仅限于Py3.6+），您可能需要使用其他格式化选项。

相关问题更多 >

编程相关推荐

热门问题

热门文章