ValueError:0不在python的列表中

from sklearn.metrics.pairwise import cosine_similarity def rank_candidates(question, candidates, embeddings, dim=300): """ question: a string candidates: a list of strings (candidates) which we want to rank embeddings: some embeddings dim: dimension of the current embeddings result: a list of pairs (initial position in the list, question) """ cosi_dic={} most_candidates=[] q_vec=question_to_vec(question,embeddings,dim) for i in candidates: can_vec=question_to_vec(i,embeddings,dim) cosi_dic[cosine_similarity(can_vec.reshape(1,-1), q_vec.reshape(1,-1))[0][0]]=i for i in (list(reversed(sorted(cosi_dic.keys(),)))): most_candidates.append((candidates.index(cosi_dic[i]),cosi_dic[i])) return most_candidates

def question_to_vec(question, embeddings, dim=300): """ question: a string embeddings: dict where the key is a word and a value is its' embedding dim: size of the representation result: vector representation for the question """ v=np.zeros(dim) all_vectors=[] question=question.split() for i in question: if i in embeddings: all_vectors.append(embeddings[i]) if all_vectors: v=np.mean(all_vectors, axis=0) return v

1条回答

网友

1楼 · 发布于 2024-05-29 06:02:42

深入研究错误后，发现在处理函数中的数据时使用字典会替换具有相同余弦相似值的值。因此，函数应如下所示：

def rank_candidates(question, candidates, embeddings, dim=300):
    """
        question: a string
        candidates: a list of strings (candidates) which we want to rank
        embeddings: some embeddings
        dim: dimension of the current embeddings

        result: a list of pairs (initial position in the list, question)
    """
    #cosi_dic={}
    most_candidates=[]
    updated_most_candidates=[]
    q_vec=question_to_vec(question,wv_embeddings,300)
    for i in candidates:
 # print(type(i))
      can_vec=question_to_vec(i,wv_embeddings,300)

      #cosi_dic[cosine_similarity(can_vec.reshape(1,-1),  q_vec.reshape(1,-1))[0][0]]=i
      sim=cosine_similarity(can_vec.reshape(1,-1),  q_vec.reshape(1,-1))[0][0]
    #for i in (list(reversed(sorted(cosi_dic.keys(),)))):
      #most_candidates.append((candidates.index(cosi_dic[i]),cosi_dic[i]))
      most_candidates.append((sim,i))
    most_candidates.sort(key=lambda x: x[0],reverse=True)
    for i in most_candidates:
      updated_most_candidates.append((candidates.index(i[1]),i[1]))


    return updated_most_candidates

相关问题更多 >

编程相关推荐

热门问题

热门文章