使用循环在Polars中创建带有条件语句的列
如何使用for循环来简化这个函数,特别是针对超级促销(比如电视、YouTube、广告横幅)在when-then表达式中的应用?
def add_promo_means(data: pl.DataFrame) -> pl.DataFrame:
return data.with_columns(
[
pl.when(pl.col("is_TV_superpromo") == 1)
.then(pl.col(f"mean_promo_quantity_with_TV_0_{week_offset}_promo_weeks"))
.when(pl.col("is_YouTube_superpromo") == 1)
.then(pl.col(f"mean_promo_quantity_with_YouTube_0_{week_offset}_promo_weeks"))
.when(pl.col("is_Banners_superpromo") == 1)
.then(pl.col(f"mean_promo_quantity_with_Banners_0_{week_offset}_promo_weeks"))
.otherwise(pl.col(f"mean_promo_quantity_without_superpromo_0_{week_offset}_promo_weeks"))
.alias(f"mean_promo_sales_0_{week_offset}_promo_weeks")
for week_offset in range(1, 4)
]
)
我尝试过类似的做法
def add_promo_means(data: pl.DataFrame) -> pl.DataFrame:
return data.with_columns(
[
pl.when(pl.col("is_TV_super_promo") == 1)
.then(pl.col(f"promo_sales_from_{superpromo}"))
.otherwise(pl.col(f"promo_sales"))
.alias(f"mean_promo_sales")
for superpromo in ['TV', 'YouTube', 'Banners' ]
]
)
但是没有成功
2 个回答
2
如果你有很多可能的渠道,比如 Banners
、YouTube
、TV
等等,下面的写法会更简洁。
def mean_promo_expr(week_offset: int) -> pl.Expr:
expr = pl.col(f"mean_promo_quantity_without_superpromo_0_{week_offset}_promo_weeks")
for channel in ["Banners", "YouTube", "TV"]:
expr = (
pl.when(pl.col(f"is_{channel}_superpromo") == 1)
.then(f"mean_promo_quantity_with_{channel}_0_{week_offset}_promo_weeks")
.otherwise(expr)
)
return expr
你可以这样使用它。
def add_promo_means(data: pl.DataFrame) -> pl.DataFrame:
return data.with_columns(
mean_promo_expr(week_offset)
.alias(f"mean_promo_sales_0_{week_offset}_promo_weeks")
for week_offset in range(1, 4)
)
另外,你也可以使用 pl.coalesce
。
(
df
.with_columns(
pl.coalesce(
*[
pl.when(pl.col(f"is_{channel}_superpromo") == 1)
.then(f"mean_promo_quantity_with_{channel}_0_{week_offset}_promo_weeks")
for channel in ["A", "B", "C"]
],
f"mean_promo_quantity_with_{channel}_0_{week_offset}_promo_weeks"
).alias(f"mean_promo_sales_0_{week_offset}_promo_weeks")
for week_offset in range(1, 4)
)
)
2
你可以使用 functools.reduce
来创建一种函数式编程风格的表达式:
from functools import reduce
promos = ['TV', 'YouTube', 'Banners']
columns = (
(
pl.col(f"is_{promo}_superpromo"),
pl.col(f"mean_promo_quantity_with_{promo}_0_{week_offset}_promo_weeks")
)
for promo in promos
)
c = [(
reduce(lambda x, y: x.when(y[0] == 1).then(y[1]), columns, pl.when(False).then(None))
.otherwise(pl.col(
f"mean_promo_quantity_without_superpromo_0_{week_offset}_promo_weeks"
))
.alias(f"mean_promo_sales_0_{week_offset}_promo_weeks")
) for week_offset in range(1, 4)]