Python Pandas Kaufman自适应移动平均(KAMA)--- 在Pandas或Cython中递归计算
我正在尝试在Python的Pandas库中创建一个函数,用于计算Kaufman自适应移动平均线(KAMA),或者使用Cython(我之前在R和Rcpp中已经做过这个)。我在递归计算方面遇到了一些问题,特别是关于filt1的部分。
filt1[i] = ( filt1[i-1] + SC[i]*(price[i]-filt1[i-1]) )
我期望KAMA系列应该有以下特点:
(i) 开头有NA值,长度为n=10
(ii) 从2010年1月19日开始,价格的原始均值是1142.393,这里指的是收盘价的均值
(iii) 之后的KAMA值是根据递归公式filt1[i]计算得出的。
所以:
KAMA
2010-01-04 NA
2010-01-05 NA
2010-01-06 NA
2010-01-07 NA
2010-01-08 NA
2010-01-11 NA
2010-01-12 NA
2010-01-13 NA
2010-01-14 NA
2010-01-15 NA
2010-01-19 1142.393
2010-01-20 1142.367
2010-01-21 1142.244
2010-01-22 1140.212
2010-01-25 1138.683
2010-01-26 1136.517
最后得到的结果应该是这样的:
2013-12-24 1791.114
2013-12-26 1802.816
2013-12-27 1814.759
2013-12-30 1822.844
2013-12-31 1830.523
我已经开始着手这个项目,
#%%
# Include this line for NEW WINDOW(S) for figures
%pylab qt4
# start with getting some data to test on
import datetime
import tradingWithPython as twp # main toolkit functions
import tradingWithPython.lib.yahooFinance as yf # yahoo finance module
import tradingWithPython.lib.backtest as backtest
from tradingWithPython.lib.extra import ProgressBar # import progress bar
#The python module (talib) that I will be using to calculate the technical
#indicators is a wrapper around the open source TA-Lib.
import talib
import numpy as np
import pandas.io.data as web
import pandas.stats.moments
import pandas as pd
#pd.set_option('html', False) # how to display data - DEFAULT is True
#pd.set_option('display.height', int(1e7))
pd.set_option('display.max_rows', int(1e7))
#pd.set_option('display.max_columns', int(1e7))
pd.set_option('display.width', 3000)
#%%
#%%
def KAMA(x, n=10, pow1=2, pow2=30):
''' kama indicator '''
''' accepts pandas dataframe of prices '''
d['absDiffx'] = abs(x - x.shift(1) )
d['ER.num'] = ( x - x.shift(n) )
d['ER.den'] = pandas.stats.moments.rolling_sum(d['absDiffx'],n)
d['ER'] = d['ER.num'] / d['ER.den']
d['SC'] = ( d['ER']*(2.0/(pow1+1)-2.0/(pow2+1.0))+2/(pow2+1.0) ) ** 2.0
return d['SC']
#%%
#%%
#Download data from yahoo finance
start = datetime.datetime(2010,1,1)
end = datetime.datetime(2013,12,31)
ticker = "^GSPC"
d=web.DataReader(ticker,'yahoo',start,end)
d.info()
#<class 'pandas.core.frame.DataFrame'>
#DatetimeIndex: 1006 entries, 2010-01-04 00:00:00 to 2013-12-31 00:00:00
#Data columns (total 6 columns):
#Open 1006 non-null float64
#High 1006 non-null float64
#Low 1006 non-null float64
#Close 1006 non-null float64
#Volume 1006 non-null int64
#Adj Close 1006 non-null float64
type(d)
#pandas.core.frame.DataFrame
d.head()
d.tail()
#%%
#%%
#calculate KAMA
#---------------
kama = KAMA(d.Close, n=10, pow1=2, pow2=30)
type(kama)
#pandas.core.frame.DataFrame
kama.head(100)
kama.tail(10)
#%%
#%%
df = pd.DataFrame({'price':d.Close, 'KAMA':KAMA(d.Close, n=10, pow1=2, pow2=30) })
df.plot(subplots=True)
#%%
请问我该如何在Pandas中或者使用Cython来计算filt1[i],并将结果作为一个Pandas数据框输出呢?非常感谢。
3 个回答
0
你可以使用 talib.KAMA 这个工具,但它和 tradingview 的结果比较起来并不准确。
import talib
import pandas as pd
def calc_kama(src, length, fastend=0.666, slowend=0.0645):
diff = abs(src-np.roll(src,1))
noise = talib.SUM(diff, length)
noise= np.where(np.isnan(noise),1,noise)
signal = talib.MOM(src,length)
signal = np.where(np.isnan(signal),0,signal)
efratio = np.where((noise!=0) , (signal/noise), 1)
efratio = np.round(efratio,2)
smoothing_constant = pow(efratio*(fastend-slowend)+slowend,2)
smoothing_constant = pd.Series(smoothing_constant)
sma = pd.Series(src.rolling(length).mean(), name="SMA")
kama = []
for smooth, sma_param, price in zip(
smoothing_constant.iteritems(), sma.shift().iteritems(), src.iteritems()
):
try:
kama.append(kama[-1] + smooth[1] * (price[1] - kama[-1]))
except (IndexError, TypeError):
if pd.notnull(sma_param[1]):
kama.append(sma_param[1] + smooth[1] * (price[1] - sma_param[1]))
else:
kama.append(None)
## apply the kama list to existing index
kama_s = pd.Series(kama, index=sma.index, name=df_column_name).round(2)
return kama_s
######################
0
你需要确认这个方法确实能给你想要的结果,不过作为一个起点,这里有一个比较快速的Cython版本。也许有更擅长Cython的人能发现一些明显的改进之处。下面是一个简单的速度对比:
In [448]: %timeit df['kama_python'] = filt_python(df['sc'].values, df['price'].values)
10 loops, best of 3: 22.5 ms per loop
In [449]: %timeit df['kama_cython'] = filt_cython(df['sc'].values, df['price'].values)
1000 loops, best of 3: 300 µs per loop
在编写这个代码时,我最开始是用纯Python写的(见下文),然后不断添加类型注解(在pandas的文档中有个很好的例子),直到它变得足够快。感谢@Jeff指出了检查np.nan的正确方法(v != v
)。
%%cython
cimport numpy as np
import numpy as np
import pandas as pd
cpdef np.ndarray[double] filt_cython(np.ndarray[double] sc, np.ndarray[double] price):
cdef np.ndarray[double] answer = np.zeros(sc.size)
cdef int N = len(answer)
cdef int i
first_value = True
for i in range(N):
if sc[i] != sc[i]:
answer[i] = np.nan
else:
if first_value:
answer[i] = price[i]
first_value = False
else:
answer[i] = answer[i-1] + sc[i] * (price[i] - answer[i-1])
return answer
这是我最开始用的纯Python版本。
def filt_python(sc, price):
answer = np.zeros(sc.size)
N = len(answer)
first_value = True
for i in range(N):
if i == 0 or pd.isnull(sc[i]):
answer[i] = np.nan
else:
if first_value:
answer[i] = price[i]
first_value = False
else:
answer[i] = answer[i-1] + sc[i] * (price[i] - answer[i-1])
return answer
4
谢谢大家的回复。有效果!
#%%
def KAMA(price, n=10, pow1=2, pow2=30):
''' kama indicator '''
''' accepts pandas dataframe of prices '''
absDiffx = abs(price - price.shift(1) )
ER_num = abs( price - price.shift(n) )
ER_den = pandas.stats.moments.rolling_sum(absDiffx,n)
ER = ER_num / ER_den
sc = ( ER*(2.0/(pow1+1)-2.0/(pow2+1.0))+2/(pow2+1.0) ) ** 2.0
answer = np.zeros(sc.size)
N = len(answer)
first_value = True
for i in range(N):
if sc[i] != sc[i]:
answer[i] = np.nan
else:
if first_value:
answer[i] = price[i]
first_value = False
else:
answer[i] = answer[i-1] + sc[i] * (price[i] - answer[i-1])
return answer
#%%
#%%
#calculate KAMA
#---------------
kama = KAMA(d.Close, n=10, pow1=2, pow2=30)
kama
#%%