擅长:python、mysql、java
<p>我对其他答案进行了扩展,使其在提供的列表中起作用。这是一种使用<code>fuzzywuzzy</code>的算法,似乎适用于类似<code>vitamin e</code>的情况</p>
<pre><code>def merge_scores(text, matches, match_func):
new_scores = []
for match in matches:
new_scores.append((match[0], (match[1] + match_func(match[0], text)) / 2))
return sorted(new_scores, key=lambda m:m[1], reverse=True)
def get_best_match(text):
fuzz_matches = process.extractBests(text, INGREDIENTS, limit=10, scorer=fuzz.ratio)
if fuzz_matches[0][1] < 80 or fuzz_matches[0][1] == fuzz_matches[1][1]:
fuzz_matches = process.extractBests(text, INGREDIENTS, limit=10, scorer=fuzz.token_set_ratio)
# Combine only if the top 5 aren't perfect matches
if fuzz_matches[4][1] != 100:
fuzz_matches = merge_scores(text, fuzz_matches, fuzz.ratio)
if fuzz_matches[0][1] == fuzz_matches[1][1]:
fuzz_matches = process.extractBests(text, INGREDIENTS, limit=10, scorer=fuzz.WRatio)
if fuzz_matches[0][1] == fuzz_matches[1][1]:
return '', 0
return fuzz_matches[0]
</code></pre>