
2024-04-20 01:41:10 发布

您现在位置:Python中文网/ 问答频道 /正文






import pandas as pd
import numpy as np
test = pd.DataFrame({'A' : ['alice', 'bob', 'george', 'michael', 'john', 'peter', 'paul', 'mary'], 
                 'B' : ['one', 'one', 'two', 'three', 'two', 'two', 'one', 'three'],
                 'C' : ['dog', 'cat', 'dog', 'cat', 'dog', 'cat', 'dog', 'cat'],
                 'D' : ['boy', 'girl', 'boy', 'girl', 'boy', 'girl', 'boy', 'girl']})
gr1, gr2, gr3 = [], [], []
gr1_names = []
def test_check1(x):

    #this is where I'm clearly not approaching this problem correctly
    for index, row in x.iterrows():
        if row['A'] not in gr1 and row['B'] not in gr1 and row['C'] not in gr1 and row['D'] not in gr1:
                 gr1.extend(row) # keep a record of what names are in what groups
                 gr1_names.append(row['A']) #save the name 




以下是一些我认为对我今天的工作很有帮助的链接: How to get all possible combinations of a list’s elements?

Get unique combinations of elements from a python list

Randomly reassign participants to groups such that participants originally from same group don't end up in same group ---这个感觉很接近,但我不知道如何把它变成我需要的--

How to generate lists from a specification of element combinations


group id    foo bar faz
       1    3   2   5
       2    3   2   5
       3    3   1   5
       4    4   1   5

Tags: of数据in分类notonecatrow
1楼 · 发布于 2024-04-20 01:41:10


import pandas as pd
import random
import math
import itertools

def n_per_group(n, n_groups):
    """find the size of each group when splitting n people into n_groups"""
    n_per_group = math.floor(n/n_groups)
    rem = n % n_per_group
    return [n_per_group if k<rem else n_per_group + 1 for k in range(n_groups)]

def assign_groups(n, n_groups):
    """split the n people in n_groups pretty evenly, and randomize"""
    n_per = n_per_group(n ,n_groups)
    groups = list(itertools.chain(*[i[0]*[i[1]] for i in zip(n_per,list(range(n_groups)))]))
    return groups

def group_diff(df, g1, g2):
    """calculate the between group score difference"""
    a = df.loc[df['group']==g1, ~df.columns.isin(('A','group'))].sum()
    b = df.loc[df['group']==g2, ~df.columns.isin(('A','group'))].sum()
    return abs(a-b).sum()

def swap_groups(df, row1, row2):
    """swap the groups of the people in row1 and row2"""
    r1group = df.loc[row1,'group']
    r2group = df.loc[row2,'group']
    df.loc[row2,'group'] = r1group
    df.loc[row1,'group'] = r2group
    return df

def row_to_group(df, row):
    """get the group associated to a given row"""
    return df.loc[row,'group']

def swap_and_score(df, row1, row2):
    given two rows, calculate the between group scores
    originally, and if we swap rows. If the score difference
    is minimized by swapping, return the swapped df, otherwise
    return the orignal (swap back)
    #orig = df
    g1 = row_to_group(df,row1)
    g2 = row_to_group(df,row2)
    s1 = group_diff(df,g1,g2)
    df = swap_groups(df, row1, row2)
    s2 = group_diff(df,g1,g2)
    if s1>s2:
        return df
        return swap_groups(df, row1, row2)

def pairwise_scores(df):
    d = []
    for i in range(n_groups):
        for j in range(i+1,n_groups):
    return d

# one hot encode and copy
df_dum = pd.get_dummies(df, columns=['B', 'C', 'D']).copy(deep=True)

#drop extra cols as needed

groups = assign_groups(n, n_groups)
df_dum['group'] = groups

# iterate
for _ in range(5000):
    rows = random.choices(list(range(n)),k=2)
    df_dum = swap_and_score(df_dum,rows[0],rows[1])


df['group'] = df_dum.group
df['orig_groups'] = groups

for i in range(n_groups):
        for j in range(i+1,n_groups):
            a = df_dum.loc[df_dum['group']==3, ~df_dum.columns.isin(('A','group'))].sum()
            b = df_dum.loc[df_dum['group']==0, ~df_dum.columns.isin(('A','group'))].sum()


相关问题 更多 >