闭式解

2024-06-08 23:30:45 发布

您现在位置:Python中文网/ 问答频道 /正文

假设我有一个Pandas系列s,它的值和1,并且它的值都大于或等于0。我需要从所有值中减去一个常数,这样新序列的和就等于0.6。问题是,当我减去这个常数时,值永远不会小于零。你知道吗

在数学公式中,假设我有一系列的x,我想找到k

enter image description here

MCVE公司

import pandas as pd
import numpy as np
from string import ascii_uppercase

np.random.seed([3, 141592653])
s = np.power(
    1000, pd.Series(
        np.random.rand(10),
        list(ascii_uppercase[:10])
    )
).pipe(lambda s: s / s.sum())

s

A    0.001352
B    0.163135
C    0.088365
D    0.010904
E    0.007615
F    0.407947
G    0.005856
H    0.198381
I    0.027455
J    0.088989
dtype: float64

总和是1

s.sum()

0.99999999999999989

我试过的

我可以使用Scipy的optimize模块中的Newton方法

from scipy.optimize import newton

def f(k):
    return s.sub(k).clip(0).sum() - .6

找到这个函数的根将为我提供所需的k

initial_guess = .1
k = newton(f, x0=initial_guess)

然后从s中减去这个

new_s = s.sub(k).clip(0)
new_s

A    0.000000
B    0.093772
C    0.019002
D    0.000000
E    0.000000
F    0.338583
G    0.000000
H    0.129017
I    0.000000
J    0.019626
dtype: float64

新的总数是

new_s.sum()

0.60000000000000009

问题

我们能不借助解算器找到k吗?你知道吗


Tags: fromimportnewasnpascii常数newton
3条回答

一个精确解,只需要一个排序,然后在O(n)中(好吧,更少:我们只需要与将变为零的值的数量一样多的循环):

我们尽可能将最小值变为零,然后在其余值之间共享剩余的多余值:

l = [0.001352, 0.163135, 0.088365, 0.010904, 0.007615, 0.407947,
     0.005856, 0.198381, 0.027455, 0.088989]

initial_sum = sum(l)
target_sum = 0.6

# number of values not yet turned to zero
non_zero = len(l)
# already substracted by substracting the constant where possible
substracted = 0

# what we want to substract to each value
constant = 0

for v in sorted(l):
    if initial_sum - substracted - non_zero * (v - constant) >= target_sum:
        substracted += non_zero * (v - constant)
        constant = v
        non_zero -= 1
    else:
        constant += (initial_sum - substracted - target_sum) / non_zero
        break

l = [v - constant if v > constant else 0 for v in l]

print(l)
print(sum(l))
# [0, 0.09377160000000001, 0.019001600000000007, 0, 0, 0.3385836, 0, 0.1290176, 0, 0.019625600000000007]
# 0.6

我没料到newton会占上风。但在大型阵列上,确实如此。你知道吗

numba.njit

Thierry'sAnswer启发
在具有numbas jit的排序数组上使用循环

import numpy as np
from numba import njit


@njit
def find_k_numba(a, t):
    a = np.sort(a)
    m = len(a)
    s = a.sum()
    to_remove = s - t

    if to_remove <= 0:
        k = 0
    else:
        for i, x in enumerate(a):
            k = to_remove / (m - i)
            if k < x:
                break
            else:
                to_remove -= x
    return k

numpy

灵感来自Paul'sAnswer
抬重物的小家伙。你知道吗

import numpy as np

def find_k_numpy(a, t):
    a = np.sort(a)
    m = len(a)
    s = a.sum()
    to_remove = s - t

    if to_remove <= 0:
        k = 0
    else:
        c = a.cumsum()
        n = np.arange(m)[::-1]
        b = n * a + c
        i = np.searchsorted(b, to_remove)
        k = a[i] + (to_remove - b[i]) / (m - i)
    return k

scipy.optimize.newton

我的牛顿法

import numpy as np
from scipy.optimize import newton


def find_k_newton(a, t):
    s = a.sum()
    if s <= t:
        k = 0
    else:
        def f(k_):
            return np.clip(a - k_, 0, a.max()).sum() - t

        k = newton(f, (s - t) / len(a))

    return k

计时赛

import pandas as pd
from timeit import timeit


res = pd.DataFrame(
    np.nan, [10, 30, 100, 300, 1000, 3000, 10000, 30000],
    'find_k_newton find_k_numpy find_k_numba'.split()
)

for i in res.index:
    a = np.random.rand(i)
    t = a.sum() * .6
    for j in res.columns:
        stmt = f'{j}(a, t)'
        setp = f'from __main__ import {j}, a, t'
        res.at[i, j] = timeit(stmt, setp, number=200)

结果

res.plot(loglog=True)

enter image description here

res.div(res.min(1), 0)

       find_k_newton  find_k_numpy  find_k_numba
10         57.265421     17.552150      1.000000
30         29.221947      9.420263      1.000000
100        16.920463      5.294890      1.000000
300        10.761341      3.037060      1.000000
1000        1.455159      1.033066      1.000000
3000        1.000000      2.076484      2.550152
10000       1.000000      3.798906      4.421955
30000       1.000000      5.551422      6.784594

更新:三种不同的实现-有趣的是,最不复杂的可扩展性最好。你知道吗

import numpy as np

def f_sort(A, target=0.6):
    B = np.sort(A)
    C = np.cumsum(np.r_[B[0], np.diff(B)] * np.arange(N, 0, -1))
    idx = np.searchsorted(C, 1 - target)
    return B[idx] + (1 - target - C[idx]) / (N-idx)

def f_partition(A, target=0.6):
    target, l = 1 - target, len(A)
    while len(A) > 1:
        m = len(A) // 2
        A = np.partition(A, m-1)
        ls = A[:m].sum()
        if ls + A[m-1] * (l-m) > target:
            A = A[:m]
        else:
            l -= m
            target -= ls
            A = A[m:]
    return target / l            

def f_direct(A, target=0.6):
    target = 1 - target
    while True:
        gt = A > target / len(A)
        if np.all(gt):
            return target / len(A)
        target -= A[~gt].sum()
        A = A[gt]

N = 10
A = np.random.random(N)
A /= A.sum()

print(f_sort(A), np.clip(A-f_sort(A), 0, None).sum())
print(f_partition(A), np.clip(A-f_partition(A), 0, None).sum())
print(f_direct(A), np.clip(A-f_direct(A), 0, None).sum())

from timeit import timeit
kwds = dict(globals=globals(), number=1000)

N = 100000
A = np.random.random(N)
A /= A.sum()

print(timeit('f_sort(A)', **kwds))
print(timeit('f_partition(A)', **kwds))
print(timeit('f_direct(A)', **kwds))

运行示例:

0.04813686999999732 0.5999999999999999
0.048136869999997306 0.6000000000000001
0.048136869999997306 0.6000000000000001
8.38109541599988
2.1064437470049597
1.2743922089866828

相关问题 更多 >