数据帧中列的矢量化逐步函数

import pandas as pd import numpy as np from bisect import bisect quality_levels = ['WayTooLow', 'TooLow', 'OK', 'TooHigh', 'WayTooHigh'] # Note: to make the vertical borders always lead towards the 'better' score we use a small epsilon around them eps = 0.000001 def get_quality(measured_value, real_value): diff = measured_value - real_value if real_value <= 10.0: i = bisect([-4.0-eps, -2.0-eps, 2.0+eps, 4.0+eps], diff) return quality_levels[i] elif real_value <= 20.0: i = bisect([-14.0-eps, -6.0-eps, 6.0+eps, 14.0+eps], diff) return quality_levels[i] elif real_value <= 50.0: i = bisect([-45.0-eps, -20.0-eps, 20.0+eps, 45.0+eps], diff) return quality_levels[i] else: i = bisect([-0.5*real_value-eps, -0.25*real_value-eps, 0.25*real_value+eps, 0.5*real_value+eps], diff) return quality_levels[i] N = 100000 df = pd.DataFrame({'ground_truth': np.random.randint(0, 100, N), 'measured': np.random.randint(0, 100, N)}) df['quality'] = df.apply(lambda row: get_quality((row['measured']), (row['ground_truth'])), axis=1) print(df.head()) print(df.quality2.value_counts()) # ground_truth measured quality #0 51 1 WayTooLow #1 7 25 WayTooHigh #2 38 95 WayTooHigh #3 76 32 WayTooLow #4 0 18 WayTooHigh #OK 30035 #WayTooHigh 24257 #WayTooLow 18998 #TooLow 14593 #TooHigh 12117

1条回答

网友

1楼 · 发布于 2024-05-14 06:29:57

这在np.select中是可能的

import numpy as np

quality_levels = ['WayTooLow', 'TooLow', 'OK',  'TooHigh', 'WayTooHigh']

def get_quality_vectorized(df):
    # Prepare the first 4 conditions, to match the 4 sets of boundaries.
    gt = df['ground_truth']
    conds = [gt <= 10, gt <= 20, gt <= 50, True]
    lo = np.select(conds, [2, 6, 20, 0.25 * gt])
    hi = np.select(conds, [4, 14, 45, 0.5 * gt])

    # Prepare inner 5 conditions, to match the 5 quality levels.
    diff = df['measured'] - df['ground_truth']
    quality_conds = [diff < -hi-eps, diff < -lo-eps, diff < lo+eps, diff < hi+eps, True]
    df['quality'] = np.select(quality_conds, quality_levels)
    return df

相关问题更多 >

编程相关推荐

热门问题

热门文章