擅长:python、mysql、java
<p>一种快速求解特定累积中值的方法</p>
<pre><code>In [1]: import timeit
In [2]: setup = """import bisect
...: import pandas as pd
...: def cummedian():
...: l = []
...: info = [0, True]
...: def inner(n):
...: bisect.insort(l, n)
...: info[0] += 1
...: info[1] = not info[1]
...: median = info[0] // 2
...: if info[1]:
...: return (l[median] + l[median - 1]) / 2
...: else:
...: return l[median]
...: return inner
...: df = pd.DataFrame({'a': range(20)})"""
In [3]: timeit.timeit("df['cummedian'] = df['a'].apply(cummedian())",setup=setup,number=100000)
Out[3]: 27.11604686321956
In [4]: timeit.timeit("df['expanding'] = df['a'].expanding().median()",setup=setup,number=100000)
Out[4]: 48.457676260100335
In [5]: 48.4576/27.116
Out[5]: 1.7870482372031273
</code></pre>