使用Seaborn和Statsmodels在一个图中显示数据和模型预测

import numpy as np import pandas as pd import seaborn as sns import statsmodels.formula.api as smf import patsy import itertools import matplotlib.pyplot as plt np.random.seed(12345) # make a data frame with one continuous and two categorical variables: df = pd.DataFrame({'x1': np.random.normal(size=100), 'x2': np.tile(np.array(['a', 'b']), 50), 'x3': np.repeat(np.array(['c', 'd']), 50)}) # create a design matrix using patsy: X = patsy.dmatrix('x1 * x2 * x3', df) # some random beta weights: betas = np.random.normal(size=X.shape[1]) # create the response variable as the noisy linear combination of predictors: df['y'] = np.inner(X, betas) + np.random.normal(size=100)

# create a new dataframe of predictions, using pandas' expand grid: def expand_grid(data_dict): """ A port of R's expand.grid function for use with Pandas dataframes. from http://pandas.pydata.org/pandas-docs/stable/cookbook.html?highlight=expand%20grid """ rows = itertools.product(*data_dict.values()) return pd.DataFrame.from_records(rows, columns=data_dict.keys()) # build a new matrix with expand grid: preds = expand_grid( {'x1': np.linspace(df['x1'].min(), df['x1'].max(), 2), 'x2': ['a', 'b'], 'x3': ['c', 'd']}) preds['yhat'] = fit.predict(preds)

x3 x1 x2 yhat 0 c -2.370232 a -1.555902 1 c -2.370232 b -2.307295 2 c 3.248944 a -1.555902 3 c 3.248944 b -2.307295 4 d -2.370232 a -1.609652 5 d -2.370232 b -2.837075 6 d 3.248944 a -1.609652 7 d 3.248944 b -2.837075

# plot using seaborn: sns.set_style('white') sns.set_context('talk') g = sns.FacetGrid(merged, hue='x2', col='x3', size=5) # use the `map` method to add stuff to the facetgrid axes: g.map(plt.plot, "x1", "yhat") g.map(plt.scatter, "x1", "y") g.add_legend() g.fig.subplots_adjust(wspace=0.3) sns.despine(offset=10);

fit = smf.ols('y ~ x2 * x3', df).fit() print(fit.summary()) preds = expand_grid( {'x2': ['a', 'b'], 'x3': ['c', 'd']}) preds['yhat'] = fit.predict(preds) print(preds) # append to df: merged = df.append(preds)

1条回答

网友

1楼 · 发布于 2024-05-16 17:37:24

正如我在评论中提到的，我有两种方式来考虑这样做。

首先是定义一个函数来进行匹配，然后绘制并传递给FacetGrid.map：

import pandas as pd
import seaborn as sns
tips = sns.load_dataset("tips")

def plot_good_tip(day, total_bill, **kws):

    expected_tip = (total_bill.groupby(day)
                              .mean()
                              .apply(lambda x: x * .2)
                              .reset_index(name="tip"))
    sns.pointplot(expected_tip.day, expected_tip.tip,
                  linestyles=["--"], markers=["D"])

g = sns.FacetGrid(tips, col="sex", size=5)
g.map(sns.pointplot, "day", "tip")
g.map(plot_good_tip, "day", "total_bill")
g.set_axis_labels("day", "tip")

enter image description here

第二个是计算预测值，然后将它们与一个附加变量合并到数据框中，该变量标识什么是数据和什么是模型：

tip_predict = (tips.groupby(["day", "sex"])
                   .total_bill
                   .mean()
                   .apply(lambda x: x * .2)
                   .reset_index(name="tip"))
tip_all = pd.concat(dict(data=tips[["day", "sex", "tip"]], model=tip_predict),
                    names=["kind"]).reset_index()

sns.factorplot("day", "tip", "kind", data=tip_all, col="sex",
               kind="point", linestyles=["-", "--"], markers=["o", "D"])

enter image description here

相关问题更多 >

编程相关推荐

热门问题

热门文章