frame= pd.DataFrame.from_records([('user1' ,'A') , ('user1','A'), ('user2' , 'A') , ('user3' ,'B')] , columns = ['user_id' , 'product_name'])
from scipy.sparse import csr_matrix
from pandas.api.types import CategoricalDtype
def incident_to_sparse_interaction_matrix(frame,user_column,item_column):
#create datatypes to count and index your categorical data (like user_id , item_id)
users = CategoricalDtype(sorted(frame[user_column].unique()), ordered=True)
items = CategoricalDtype(sorted(frame[item_column].unique()), ordered=True)
frame['score'] = 1 # add score column to fill the interaction matrix with this can be score of the movie or simple 1 as indicator variable
row = frame[user_column].astype(users).cat.codes
col = frame[item_column].astype(items).cat.codes
sparse_matrix = csr_matrix((frame['score'], (row, col)), \
shape=(users.categories.size, items.categories.size))
return sparse_matrix
collab_sparse = incident_to_sparse_interaction_matrix(frame , 'user_id' , 'product_name')
print(collab_sparse.toarray())
你可以做:
给定的答案不适用于大型事件数据帧,因此我建议您将其存储为稀疏矩阵,您可以按如下操作
将稀疏矩阵转换为稠密矩阵如下所示
相关问题 更多 >
编程相关推荐