如何编写与以下R代码相当的Python/Pandas代码?

2024-06-07 05:44:35 发布

您现在位置:Python中文网/ 问答频道 /正文

对于一个项目,我正在尝试将以下R代码转换为Python,但我正在努力为R中使用的summary和mutate命令编写等效的代码

  users <- users %>% 
  mutate(coup_start=ifelse(first_coup>DAY,"no","yes")) %>%
  group_by(household_key,WEEK_NO,coup_start) %>% 
  summarize(weekly_spend=sum(SALES_VALUE),
            dummy=1) #adding new column dummy

users_before <- filter(users,coup_start=="no")
users_after <- filter(users,coup_start=="yes")

users_before <- users_before %>%
  group_by(household_key) %>%
  mutate(cum_dummy=cumsum(dummy),
         trip=cum_dummy-max(cum_dummy)) %>%
  select(-dummy,-cum_dummy)

users_after <- users_after %>%
  group_by(household_key) %>%
  mutate(trip=cumsum(dummy)-1) %>%
  select(-dummy)

我尝试了以下方法:

users = transaction_data.merge(coupon_users,on='household_key')
users['coup_start']=  np.where((users['first_coup'] > users['DAY_x']), 1, 0)
users['dummy'] = 1

users_before = users[users['coup_start']==0]
users_after = users[users['coup_start']==1]

users_before['cum_dummy'] = users_before.groupby(['household_key'])['dummy'].cumsum()
users_before['trip'] = users_before.groupby(['household_key'])['cum_dummy'].transform(lambda x: x - x.max())

users_after['trip'] = users_after.groupby(['household_key'])['dummy'].transform(lambda x: cumsum(x) - 1)

但是我遇到了多个问题,转换(lambda x:cumsum(x)-1)抛出了一个错误。在此之前的两次groupby和transform尝试显示以下警告: 试图在数据帧切片的副本上设置值。 尝试改用.loc[row\u indexer,col\u indexer]=value 请参阅文档中的注意事项:https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy “”“启动IPython内核的入口点

我还觉得最初没有正确插入dummy=1。如何用Python转换R中的mutate/summary函数

编辑

我已尝试使用apply函数执行累积和运算

def thisop(x): return(cumsum(x)-1 )
users_after['trip']=users_after.groupby(['household_key'])['dummy'].apply(thisop)

错误:NameError: name 'cumsum' is not defined仍然存在


Tags: keybygroupcoupstartusersdummygroupby
2条回答

在python中使用相同的语法如何:

from datar.all import f, mutate, if_else, summarize, filter, group_by, select, sum, cumsum, max

users = users >> \
  mutate(coup_start=if_else(f.first_coup>f.DAY,"no","yes")) >> \
  group_by(f.household_key,f.WEEK_NO,f.coup_start) >> \
  summarize(weekly_spend=sum(f.SALES_VALUE),
            dummy=1) #adding new column dummy

users_before = filter(users,f.coup_start=="no")
users_after = filter(users,f.coup_start=="yes")

users_before = users_before >> \
  group_by(f.household_key) >> \
  mutate(cum_dummy=cumsum(f.dummy),
         trip=f.cum_dummy-max(f.cum_dummy)) >> \
  select(~f.dummy,~f.cum_dummy)

users_after = users_after >> \
  group_by(f.household_key) >> \
  mutate(trip=cumsum(f.dummy)-1) >> \
  select(~f.dummy)

我是^{}包的作者。如果您有任何问题,请随时提交问题

您已将一些变量和值名称从R更改为Python代码(例如DAY更改为DAY_X)。
以下代码应该可以从R代码中提取变量/值:

users = (
    users.assign(coup_start = np.where(users.first_coup > users.DAY), 'no', 'yes')
    .groupby(['household_key','WEEK_NO','coup_start'])
    .agg(weekly_spend=(SALES_VALUE, sum))
    .assign(dummy=1)
)

users_before = users.query('coup_start=="no"')
users_after = users.query('coup_start=="yes"')

users_before = (
    users_before.assign (
        trip = users_before.groupby('household_key').dummy
        .transform(lambda x: x.cumsum() - x.cumsum().max()) )
    .drop(columns='dummy')
)

users_after = (
    users_after.assign (
        trip = users_after.groupby('household_key')
        .transform(trip = dummy.cumsum()-1) )
    .drop(columns='dummy')
)

相关问题 更多 >