基于阵列输入的矢量化算法

from scipy import sparse from scipy.sparse import coo_matrix def labels_to_sparse(input_): all_, lables_, scores_ = input_ rows = [0]*len(all_) cols = range(len(all_)) vals = [0]*len(all_) for i in range(len(lables_)): vals[all_.index(lables_[i])] = scores_[i] return coo_matrix((vals, (rows, cols))) df['sparse_row'] = df.apply( lambda x: labels_to_sparse((all_labels, x['labels'], x['scores'])), axis=1 ) df

all_labels = np.sort(all_labels) n = len(df) lens = list(map(len,df['labels'])) l_ar = np.concatenate(df['labels'].to_list()) d = np.concatenate(df['scores'].to_list()) R = np.repeat(np.arange(n),lens) C = np.searchsorted(all_labels,l_ar) my_result = coo_matrix( (d, (R, C)), shape = (n,len(all_labels)))

2条回答

网友
1楼 · 编辑于 2024-04-23 07:56:19

这是一个基于^{}-
n = len(df) lens = list(map(len,df['labels'])) l_ar = np.concatenate(df['labels']) d = np.concatenate(df['scores']) out = np.zeros((n,len(all_labels)),dtype=d.dtype) R = np.repeat(np.arange(n),lens) C = np.searchsorted(all_labels,l_ar) out[R, C] = d
注意：如果all_labels没有排序，我们需要将sorterarg与searchsorted一起使用。你知道吗
要获得稀疏矩阵输出，如^{}-
from scipy.sparse import csr_matrix,coo_matrix out_sparse = coo_matrix( (d, (R, C)), shape = (n,len(all_labels)))

网友
2楼 · 编辑于 2024-04-23 07:56:19

这里有几个你可以尝试的替代方法。你知道吗
方法1-用列表理解和^{}
重新构造你的DataFrame
from string import ascii_lowercase all_labels = list(ascii_lowercase) my_result = (pd.DataFrame([dict(zip(l, v)) for _, (l, v) in df.iterrows()]) .reindex(columns=all_labels).fillna(0).values)
方法2-for loop，使用^{}
更新值
my_result = pd.DataFrame(np.zeros((len(df), len(all_labels))), columns=all_labels) for i, (lab, val) in df.iterrows(): my_result.loc[i, lab] = val my_result = my_result.values
两者应产生相同的产出。你知道吗
[输出]
[[0.2 0.1 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ] [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.7 0. 0. 0. 0. 0. 0. 0. 0. 0. ] [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.5 0. 0. 0. 0.3 0. 0. 0. 0. 0. 0. 0. 0.1 0. 0. 0. 0. ]]

方法1-用列表理解和^{}

方法2-`for loop`，使用^{}

相关问题更多 >

编程相关推荐

热门问题

热门文章