功能选择与FDR控制。

feature-selection-fdr的Python项目详细描述


>>> import numpy as np
>>> from fs_fdr import barber_candes_selection, knockoff_features_construction, utils
>>>
>>> from sklearn.ensemble import GradientBoostingClassifier
>>> #######################################################################################
>>> #### Simulate data to check the performance of the methods.
>>> #######################################################################################
>>>
>>>
>>> type = "classification"
>>> # First simulate some data
>>> n, p, p1 = 1000, 50, 20
>>> rho = 0.
>>> mean = 0.
>>> sd = 1.
>>> error_std = 1.
>>> r = ["uniform", .0, .5]
>>> x = mean + sd * np.random.normal(0., 1., size=(n, p))
>>> true_w = np.random.uniform(r[1], r[2], size=(p1, 1))
>>> negate = np.random.binomial(n=1, p=.5, size=(p1, 1))
>>> negate[np.where(negate==0.), :] = -1
>>> true_w = true_w * negate
>>> true_index = np.random.choice(np.arange(p), size = p1, replace=False)
>>> true_index = np.sort(true_index)
>>> xbeta = np.dot(x[:, true_index], true_w)
>>> pr = 1/(1+np.exp(-xbeta))
>>> t = (pr > .5) + 0.
>>>
>>> q = .1
>>> # Step 2: Create knockoff features using the knockoff_method library
>>>
>>> # Set parameters
>>>
>>> selection_method = "knockoff-MX"
>>> optimization = ["ASDP", "selfblocks", 50, 50]
>>>
>>> VI_stat = "Diff"
>>>
>>>
>>> myknockoff = knockoff_features_construction.Knockoff(x, selection_method, optimization)
>>> knockoff_attrs = myknockoff.knockoff_features()
>>> x, x_tilda = knockoff_attrs.X, knockoff_attrs.X_tilde
>>>
>>>
>>>
>>> modeling = {"model":"gradient boosting", "params":"classification fi"}
>>>
>>>
>>> data = [x, x_tilda, t]
>>> knockoff_selection = barber_candes_selection.BarberCandesSelection(data, modeling, selection_method,q=q, VI_stat=VI_stat).selection()
>>>
>>> S_knock = knockoff_selection.S
>>> FDR_UB = knockoff_selection.FDR_UB
>>>
>>>
>>>
>>> fdr_knock = 100*utils.FDR(S_knock, true_index)
>>> power_knock = 100*utils.power(S_knock, true_index)
>>> fnp_knock = 100*utils.FNP(S_knock, true_index, p)
>>> print('------------Knockoff ({})-------------'.format(modeling["model"]))
>>> print("Empirical FDR: " + str(100*np.round(FDR_UB, 2)) + "%")
>>> print("FDR:  " +str(fdr_knock) + "%")
>>> print("power:  "+str(power_knock) + "%")
>>> print("FNP:  "+str(fnp_knock) + "%")
>>>
>>>
>>> ##########DSS
>>>
>>>
>>> modeling = {"model":"gradient boosting", "params":"classification fi"}
>>> selection_method = "DSS"
>>> data = [x, t]
>>> split_type = ["sampling", 5, 5]
>>> prob = .7
>>> DSS_selection = barber_candes_selection.BarberCandesSelection(data, modeling, selection_method,q=q).selection()
>>>
>>> S_dss = DSS_selection.S
>>> FDR_UB = DSS_selection.FDR_UB
>>>
>>>
>>>
>>> fdr_dss = 100*utils.FDR(S_dss, true_index)
>>> power_dss = 100*utils.power(S_dss, true_index)
>>> fnp_dss = 100*utils.FNP(S_dss, true_index, p)
>>> print('------------DSS ({})-------------'.format(modeling["model"]))
>>> print("Empirical FDR: " + str(100*np.round(FDR_UB, 2)) + "%")
>>> print("FDR:  " +str(fdr_dss) + "%")
>>> print("power:  "+str(power_dss) + "%")
>>> print("FNP:  "+str(fnp_dss) + "%")
>>>
>>>
>>>
>>> ############### SVM
>>>
>>> modeling = {"model": "not specified", "params":"given"}
>>>
>>> from sklearn.svm import SVC
>>>
>>> svm = SVC(C=1., kernel="linear")
>>> svm_w = svm.fit(np.hstack((x, x_tilda)), t.ravel()).coef_
>>>
>>> selection_method = "knockoff"
>>> knockoff_selection = barber_candes_selection.BarberCandesSelection(modeling=modeling, selection_method=selection_method, w = svm_w).selection()
>>>
>>> S_knock = knockoff_selection.S
>>> FDR_UB = knockoff_selection.FDR_UB
>>> print("empirical FDR: " + str(100*np.round(FDR_UB, 2)))
>>>
>>> fdr_knock = 100*utils.FDR(S_knock, true_index)
>>> power_knock = 100*utils.power(S_knock, true_index)
>>> fnp_knock = 100*utils.FNP(S_knock, true_index, p)
>>> print('------------Knockoff ({})-------------'.format(modeling["model"]))
>>> print("Empirical FDR: " + str(100*np.round(FDR_UB, 2)) + "%")
>>> print("FDR:  " +str(fdr_knock) + "%")
>>> print("power:  "+str(power_knock) + "%")
>>> print("FNP:  "+str(fnp_knock) + "%")

欢迎加入QQ群-->: 979659372 Python中文网_新手群

推荐PyPI第三方库


热门话题
内部嵌套滚动视图的java Recyclerview?   返回空值的java MapPartitionFunction   java无法访问apache模块源代码中的系统定义的环境变量   java需要帮助确定用户输入的最大值和最小值   java检查文件是否存在,如果存在,则创建一个编号更高的新文件   针对许多不正确XML文档的XSD的XML验证在Java中不报告任何错误?   java有没有一种方法可以将json字符串甚至JSONObject发送给POSTMAN,而不在每个双引号前加斜杠?   java如何获得频域中两个音频信号的比率?   java 安卓 studio理解错误   java插入到部分填充的数组中   java使用流<String>创建流<CustomType>?   java开放式按键应用程序   java获取zxing生成的图像路径   使用shibboleth sso的java授权   java在运行时更改bean类名   javajooq如何从表中选择min'id'   java通过Eclipse web项目运行hadoop mapreduce字数   java同步方法与非同步方法同时执行   方法每次返回一个不同的加密字符数组   java获取ArrayList的每个元素中出现的特定字符的频率<String>