如何在中心对齐的堆叠柱状图中实现间距

0 投票
1 回答
877 浏览
提问于 2025-04-18 04:38

我正在使用matplotlib,一个有人写的堆叠柱状图程序来绘制堆叠柱状图。

我的图表:

x轴有8个收入分布,每个柱子对应一个分布。

y轴表示每个收入分布中人的百分比。类型-a的人是第一个堆叠,类型-b的人是第二个堆叠,类型-c的人是第三个堆叠。

我的柱状图是居中的,我正在想办法调整柱子的间距,这样图表看起来更好,标签也更容易阅读。有什么建议或者需要澄清的地方吗?

这个程序叫做stackedBarGraph.py,代码看起来是这样的,其中widths是一个包含8个值的数组,每个值对应一个柱子的宽度。

如果你需要更多信息,请告诉我(我尽量保持所有内容相关)。谢谢!

完整代码(希望不是太难读):

   from __future__ import division
from pylab import * 
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher

data = csv2rec('coa.csv', delimiter=',')

x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']

df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))

#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source:
        source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))

#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source2:
        source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])

total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10

#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])

#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths,  showFirst = 8, xLabels=d_labels,scale=True)

堆叠柱状图程序:

    def stackedBarPlot(self,
                       ax,                                 # axes to plot onto
                       data,                               # data to plot
                       cols,                               # colors for each level
                       xLabels = None,                     # bar specific labels
                       yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                       edgeCols=None,                      # colors for edges
                       showFirst=-1,                       # only plot the first <showFirst> bars
                       scale=False,                        # scale bars to same height
                       widths=None,                        # set widths for each bar
                       heights=None,                       # set heights for each bar
                       ylabel='',                          # label for x axis
                       xlabel=''                          # label for y axis
                       ):

#------------------------------------------------------------------------------
# data fixeratering

        # make sure this makes sense
        if showFirst != -1:
            showFirst = np.min([showFirst, np.shape(data)[0]])
            data_copy = np.copy(data[:showFirst]).transpose().astype('float')
            data_shape = np.shape(data_copy)
            if heights is not None:
                heights = heights[:showFirst]
            if widths is not None:
                widths = widths[:showFirst]
            showFirst = -1
        else:
            data_copy = np.copy(data).transpose()
        data_shape = np.shape(data_copy)

        # determine the number of bars and corresponding levels from the shape of the data
        num_bars = data_shape[1]
        levels = data_shape[0]

        if widths is None:
            widths = np.array([1] * num_bars)
            x = np.arange(num_bars)
        else:
            x = [0]
            for i in range(1, len(widths)):
                x.append(x[i-1] + (widths[i-1] + widths[i])/2)


        # stack the data --
        # replace the value in each level by the cumulative sum of all preceding levels
        data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)

        # scale the data is needed
        if scale:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            if heights is not None:
                print "WARNING: setting scale and heights does not make sense."
                heights = None
        elif heights is not None:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            for i in np.arange(num_bars):
                data_copy[:,i] *= heights[i]
                data_stack[:,i] *= heights[i]

#------------------------------------------------------------------------------
# ticks

        if yTicks is not "none":
            # it is either a set of ticks or the number of auto ticks to make
            real_ticks = True
            try:
                k = len(yTicks[1])
            except:
                real_ticks = False

            if not real_ticks:
                yTicks = float(yTicks)
                if scale:
                    # make the ticks line up to 100 %
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)
                    y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
                else:
                    # space the ticks along the y axis
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
                    y_tick_labels = np.array([str(i) for i in y_ticks_at])
                yTicks=(y_ticks_at, y_tick_labels)

#------------------------------------------------------------------------------
# plot

        if edgeCols is None:
            edgeCols = ["none"]*len(cols)

        # bars
        ax.bar(x,
               data_stack[0],
               color=cols[0],alpha=0.7,
               edgecolor=edgeCols[0],
               width=widths,
               linewidth=0.5,
               align='center'
               )
        for i in np.arange(1,levels):
            ax.bar(x,
                   data_copy[i],
                   bottom=data_stack[i-1],
                   color=cols[i],alpha=0.7,
                   edgecolor=edgeCols[i],
                   width=widths,
                   linewidth=0.5,
                   align='center'
                   )

        # borders
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["left"].set_visible(False)

        # make ticks if necessary
        if yTicks is not "none":
            ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
            ax.yaxis.tick_left()
            plt.yticks(yTicks[0], yTicks[1])
        else:
            plt.yticks([], [])

        if xLabels is not None:
            ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
            ax.xaxis.tick_bottom()
            plt.xticks(x, xLabels, rotation='horizontal')
        else:
            plt.xticks([], [])

        # limits
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
        ax.set_ylim(0, np.max(data_stack))

        # labels
        if xlabel != '':
            ax.xlabel(xlabel)
        if ylabel != '':
            ax.ylabel(ylabel)

目前的样子

1 个回答

0

好的,感谢大家的建议(还有Bill教我如何有效使用列表推导式)。

我成功地修改了程序,达到了我想要的效果(我觉得是这样)。我在程序的以下部分添加了一个新变量,叫做axspacing:

def stackedBarPlot(self,
                   ax,                                 # axes to plot onto
                   data,                               # data to plot
                   cols,                               # colors for each level
                   xLabels = None,                     # bar specific labels
                   yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                   edgeCols=None,                      # colors for edges
                   showFirst=-1,                       # only plot the first <showFirst> bars
                   scale=False,                        # scale bars to same height
                   widths=None,                        # set widths for each bar
                   heights=None,                       # set heights for each bar
                   ylabel='',                          # label for x axis
                   xlabel='',                          # label for y axis
                   xaxlim=None,
                   axspacing=0,
                   ):

.

    if widths is None:
        widths = np.array([1] * num_bars)
        x = np.arange(num_bars)
    else:
        x = [0]
        for i in range(1, len(widths)):
            x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)

.

    # limits
    #ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
    ax.set_ylim(0, np.max(data_stack))
    if xaxlim is None:
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
    else:
        ax.set_xlim(xaxlim)

撰写回答