Python迭代每个子行组并应用函数

2024-04-26 11:18:03 发布

您现在位置:Python中文网/ 问答频道 /正文


Tags: python
1条回答
网友
1楼 · 发布于 2024-04-26 11:18:03

假设您有三个列对,那么您可以应用以下内容,对于更多的列对,则相应地调整脚本。我想给你一个解决问题的方法,这可能不是最有效的方法,但它提供了一个起点。你知道吗

import pandas as pd
import numpy as np
ls = [
      ['A', 1, 'A1', 9],
      ['A', 1, 'A1', 6],
      ['A', 1, 'A1', 3],
      ['A', 2, 'A2', 7],
      ['A', 3, 'A3', 9],
      ['B', 1, 'B1', 7],
      ['B', 1, 'B1', 3],
      ['B', 2, 'B2', 7],
      ['B', 2, 'B2', 8],
      ['C', 1, 'C1', 9],

      ]

#convert to dataframe
df = pd.DataFrame(ls, columns = ["Main_Group", "Sub_Group", "Concat_GRP_Name", "X_1"]) 

#get count and sum of concatenated groups
df_sum = df.groupby('Concat_GRP_Name')['X_1'].agg(['sum','count']).reset_index()

#print in permutations formula to calculate different permutation combos   
import itertools as it
perms = it.permutations(df_sum.Concat_GRP_Name)


def combute_combinations(df, colname, main_group_series):
    l = []
    import itertools as it
    perms = it.permutations(df[colname])

    # Provides sorted list of unique values in the Series
    unique_groups = np.unique(main_group_series)

    for perm_pairs in perms:
        #take in only the first three pairs of permuations and make sure
        #the first column starts with A, secon with B, and third with C
        if all([main_group in perm_pairs[ind] for ind, main_group in enumerate(unique_groups)]):
            l.append([perm_pairs[ind] for ind in range(unique_groups.shape[0])])
    return l

t = combute_combinations(df_sum, 'Concat_GRP_Name', df['Main_Group'])

#convert to dataframe and drop duplicate pairs
df2 = pd.DataFrame(t, columns = ["Item1", 'Item2', 'Item3']) .drop_duplicates()

#do a join between the dataframe that contains the sums and counts for the concat_grp_name to bring in the counts for 
#each column from df2, since there are three columns: we must apply this three times
merged = df2.merge(df_sum[['sum', 'count', 'Concat_GRP_Name']], left_on=['Item1'], right_on=['Concat_GRP_Name'], how='inner')\
.drop(['Concat_GRP_Name'], axis = 1)\
.rename({'sum':'item1_sum'}, axis=1)\
.rename({'count':'item1_count'}, axis=1)

merged2 = merged.merge(df_sum[['sum', 'count', 'Concat_GRP_Name']], left_on=['Item2'], right_on=['Concat_GRP_Name'], how='inner')\
.drop(['Concat_GRP_Name'], axis = 1)\
.rename({'sum':'item2_sum'}, axis=1)\
.rename({'count':'item2_count'}, axis=1)

merged3 = merged2.merge(df_sum[['sum', 'count', 'Concat_GRP_Name']], left_on=['Item3'], right_on=['Concat_GRP_Name'], how='inner')\
.drop(['Concat_GRP_Name'], axis = 1)\
.rename({'sum':'item3_sum'}, axis=1)\
.rename({'count':'item3_count'}, axis=1)

#get the sum of all of the item_sum cols
merged3['sums']= merged3[['item3_sum', 'item2_sum', 'item1_sum']].sum(axis = 1)

#get sum of all the item_count cols
merged3['counts']= merged3[['item3_count', 'item2_count', 'item1_count']].sum(axis = 1)

#find the average
merged3['LOOPED_AVG'] = merged3['sums'] / merged3['counts']

#remove irrelavent fields
merged3 = merged3.drop(['item3_count', 'item2_count', 'item1_count', 'item3_sum', 'item2_sum', 'item1_sum', 'counts', 'sums' ], axis = 1)

相关问题 更多 >