与Python中提供的引用列表匹配的近似字符串

# provided list >> jobskill = ["scrum", "customer experience improvement", "python"] # long string >> jobtext = ["We are looking for Graduates in our Customer Experience department in Swindon, you will be responsible for improving customer experience and will also be working with the digital team. Send in your application by 31st December 2018", "If you are ScrumMaster at the top of your game with ability to communicate inspire and take people with you then there could not be a better time, we are the pioneer in digital relationship banking, and we are currently lacking talent in our Scrum team, if you are passionate about Scrum, apply to our Scrum team, knowledge with python is a plus!"] # write a function that returns most frequent approximate match >> mostfrequent(input = jobtext, lookup = jobskill) # desired_output: {"customer experience improvement, "scrum"}

1条回答

网友

1楼 · 发布于 2024-06-01 03:06:59

模糊使用

from collections import defaultdict
from fuzzywuzzy import fuzz

# provided list
jobskill = ["scrum", "customer experience improvement", "python"]

# long string
jobtext = [
    "We are looking for Graduates in our Customer Experience department in Swindon, you will be responsible for improving customer experience and will also be working with the digital team. Send in your application by 31st December 2018",
    "If you are ScrumMaster at the top of your game with ability to communicate inspire and take people with you then there could not be a better time, we are the pioneer in digital relationship banking, and we are currently lacking talent in our Scrum team, if you are passionate about Scrum, apply to our Scrum team, knowledge with python is a plus!",
]


def k_most_frequent(k, text, queries, threshold=70):
    """Return k most frequent queries using fuzzywuzzy to match."""

    frequency = defaultdict(int)
    text = " ".join(text).split()
    for query in queries:
        for window in range(len(query.split()) + 1):
            frequency[query] += sum(
                [
                    fuzz.ratio(query, " ".join(text[i : i + window])) > threshold
                    for i in range(len(text))
                ]
            )

    return sorted(frequency.keys(), key=frequency.get, reverse=True)[:k]


print(k_most_frequent(2, jobtext, jobskill))

# output: ["customer experience improvement, "scrum"]

模糊使用

相关问题更多 >

编程相关推荐

热门问题

热门文章