import numpy
from scipy.stats import beta
from scipy.stats import norm
def binomial_hpdr(n, N, pct, a=1, b=1, n_pbins=1e3):
"""
Function computes the posterior mode along with the upper and lower bounds of the
**Highest Posterior Density Region**.
Parameters
----------
n: number of successes
N: sample size
pct: the size of the confidence interval (between 0 and 1)
a: the alpha hyper-parameter for the Beta distribution used as a prior (Default=1)
b: the beta hyper-parameter for the Beta distribution used as a prior (Default=1)
n_pbins: the number of bins to segment the p_range into (Default=1e3)
Returns
-------
A tuple that contains the mode as well as the lower and upper bounds of the interval
(mode, lower, upper)
"""
# fixed random variable object for posterior Beta distribution
rv = beta(n+a, N-n+b)
# determine the mode and standard deviation of the posterior
stdev = rv.stats('v')**0.5
mode = (n+a-1.)/(N+a+b-2.)
# compute the number of sigma that corresponds to this confidence
# this is used to set the rough range of possible success probabilities
n_sigma = numpy.ceil(norm.ppf( (1+pct)/2. ))+1
# set the min and max values for success probability
max_p = mode + n_sigma * stdev
if max_p > 1:
max_p = 1.
min_p = mode - n_sigma * stdev
if min_p > 1:
min_p = 1.
# make the range of success probabilities
p_range = numpy.linspace(min_p, max_p, n_pbins+1)
# construct the probability mass function over the given range
if mode > 0.5:
sf = rv.sf(p_range)
pmf = sf[:-1] - sf[1:]
else:
cdf = rv.cdf(p_range)
pmf = cdf[1:] - cdf[:-1]
# find the upper and lower bounds of the interval
sorted_idxs = numpy.argsort( pmf )[::-1]
cumsum = numpy.cumsum( numpy.sort(pmf)[::-1] )
j = numpy.argmin( numpy.abs(cumsum - pct) )
upper = p_range[ (sorted_idxs[:j+1]).max()+1 ]
lower = p_range[ (sorted_idxs[:j+1]).min() ]
return (mode, lower, upper)
虽然scipy.stats模块有一个计算等尾置信度的方法,但它缺少一个计算最高密度区间的类似方法。下面是一个使用scipy和numpy中的方法来完成它的粗略方法。
此解决方案还假设您希望使用Beta发行版作为先前版本。超参数
a
和b
被设置为1,因此默认的优先级是0和1之间的均匀分布。注意,因为这里还没有贴出^{} 让你用各种方法得到二项置信区间。不过,它只做对称间隔。
如果你有选择的话,我会说R(或者另一个统计数据包)可能会更好地为你服务。也就是说,如果你只需要二项置信区间,你可能不需要整个库。下面是我最天真的javascript翻译中的函数。
相关问题 更多 >
编程相关推荐