如何将pandas groupby结果转换为合并词典？

import pandas as pd data = {'Country': ['United States', 'United States', 'United States', 'India', 'India', 'Australia', 'Australia', 'Australia', 'Australia'], 'State': ['New York', 'California', 'California', 'Karnataka', 'Punjab', 'Victoria', 'Victoria', 'Victoria', 'Victoria'], 'Team': ['Globetrotters', 'Lakers', 'Lakers', 'Beasts', 'Steelers', 'United', 'United', 'United', 'United'], 'Player': ['Dudley', 'James', 'Davis', 'Kumar', 'Singh', 'Smith', 'Smith', 'Smith', 'Jones'], 'Status': ['Normal', 'NBA Legend', 'NBA', 'Normal', 'Normal', 'Normal', 'Normal', 'NBA', 'Normal']} df = pd.DataFrame(data) columns = ['Country', 'State', 'Team', 'Player', 'Status'] groupings = df.groupby(columns).size() print(groupings)

Country State Team Player Status Australia Victoria United Jones Normal 1 Smith NBA 1 Normal 2 India Karnataka Beasts Kumar Normal 1 Punjab Steelers Singh Normal 1 United States California Lakers Davis NBA 1 James NBA Legend 1 New York Globetrotters Dudley Normal 1

[{'Country': 'Australia', 'State': 'Victoria', 'Team': 'United', 'Jones': 1, 'Smith': 3, 'Normal': 3, 'NBA': 1, 'Total': 4}, {'Country': 'India', 'State': 'Karnataka', 'Team': 'Beasts', 'Kumar': 1, 'Normal': 1, 'Total': 1}, {'Country': 'India', 'State': 'Punjab', 'Team': 'Stealers', 'Singh': 1, 'Normal': 1, 'Total': 1}, {'Country': 'United States', 'State': 'California', 'Team': 'Lakers', 'Davis': 1, 'James': 1, 'NBA': 1,'NBA Legend': 1, 'Total': 2}, {'Country': 'United States', 'State': 'New York', 'Team': 'Globetrotters', 'Dudley': 1, 'Normal': 1, 'Total': 1}]

2条回答

网友

1楼 · 编辑于 2024-04-26 06:11:41

尝试使用reindex、repeat、reset_index、get_dummies、groupby和to_dict添加此代码：

groupings = groupings.reset_index()
groupings.columns = [i if i != 0 else 'Total' for i in groupings.columns]
groupings = groupings.reindex(groupings.index.repeat(groupings['Total'])).reset_index(drop=True)
groupings['Total'] = 1
print(pd.get_dummies(groupings, columns=['Player', 'Status'], prefix='', prefix_sep='').groupby(['Country', 'State', 'Team'], as_index=False).sum().to_dict('records'))

输出：

[{'Country': 'Australia', 'State': 'Victoria', 'Team': 'United', 'Total': 4, 'Davis': 0, 'Dudley': 0, 'James': 0, 'Jones': 1, 'Kumar': 0, 'Singh': 0, 'Smith': 3, 'NBA': 1, 'NBA Legend': 0, 'Normal': 3}, {'Country': 'India', 'State': 'Karnataka', 'Team': 'Beasts', 'Total': 1, 'Davis': 0, 'Dudley': 0, 'James': 0, 'Jones': 0, 'Kumar': 1, 'Singh': 0, 'Smith': 0, 'NBA': 0, 'NBA Legend': 0, 'Normal': 1}, {'Country': 'India', 'State': 'Punjab', 'Team': 'Steelers', 'Total': 1, 'Davis': 0, 'Dudley': 0, 'James': 0, 'Jones': 0, 'Kumar': 0, 'Singh': 1, 'Smith': 0, 'NBA': 0, 'NBA Legend': 0, 'Normal': 1}, {'Country': 'United States', 'State': 'California', 'Team': 'Lakers', 'Total': 2, 'Davis': 1, 'Dudley': 0, 'James': 1, 'Jones': 0, 'Kumar': 0, 'Singh': 0, 'Smith': 0, 'NBA': 1, 'NBA Legend': 1, 'Normal': 0}, {'Country': 'United States', 'State': 'New York', 'Team': 'Globetrotters', 'Total': 1, 'Davis': 0, 'Dudley': 1, 'James': 0, 'Jones': 0, 'Kumar': 0, 'Singh': 0, 'Smith': 0, 'NBA': 0, 'NBA Legend': 0, 'Normal': 1}]

网友

2楼 · 编辑于 2024-04-26 06:11:41

首先，我稍微改变一下您的分组：

from collections import Counter
import pandas as pd

def list_counts(l):
    return dict(Counter(l))

df = pd.DataFrame(data)

groupings = df.groupby(
    [
        "Country",
        "State",
        "Team",
    ],
    as_index=False,
).agg(Total=("Player", "size"), Players=("Player", list), Status=("Status", list))

然后我使用计数器计算玩家和状态：

groupings["Players"] = groupings["Players"].apply(list_counts)
groupings["Status"] = groupings["Status"].apply(list_counts)

然后，我将数据帧转换为一个dict，并从dict中删除播放器和状态级别。同时，我将Total移动到dict的末尾

res = groupings.to_dict(orient="records")

for r in res:
    players = r["Players"]
    status = r["Status"]
    total = r["Total"]
    r.pop("Total")
    r.pop("Players")
    r.pop("Status")
    for i in players:
        r[i] = players[i]
    for i in status:
        r[i] = status[i]
    r["Total"] = total

输出：

[{'Country': 'Australia',
  'State': 'Victoria',
  'Team': 'United',
  'Smith': 2,
  'Jones': 1,
  'Normal': 3,
  'Total': 3},
 {'Country': 'India',
  'State': 'Karnataka',
  'Team': 'Beasts',
  'Kumar': 1,
  'Normal': 1,
  'Total': 1},
 {'Country': 'India',
  'State': 'Punjab',
  'Team': 'Steelers',
  'Singh': 1,
  'Normal': 1,
  'Total': 1},
 {'Country': 'United States',
  'State': 'California',
  'Team': 'Lakers',
  'James': 1,
  'Davis': 1,
  'NBA Legend': 1,
  'NBA': 1,
  'Total': 2},
 {'Country': 'United States',
  'State': 'New York',
  'Team': 'Globetrotters',
  'Dudley': 1,
  'Normal': 1,
  'Total': 1}]

替代解决方案-更简洁但原理相同。按要求输出，但结构略有不同：

from collections import Counter


def list_counts(l):
    return dict(Counter(l))


df = pd.DataFrame(data)
res = (
    df.groupby(
        [
            "Country",
            "State",
            "Team",
        ],
        as_index=False,
    )
    .agg(
        Total=("Player", "size"),
        Players=("Player", list_counts),
        Status=("Status", list_counts),
    )
    .to_dict(orient="records")
)

相关问题更多 >

编程相关推荐

热门问题

热门文章