upload = ['wish you happy birthday','take care','good night']
search_string = 'foo happy birthday take care bar good night good'
words = search_string.split()
len_word = len(words)
result = dict()
def findWordInUpload(search_str, upld):
for j, u in enumerate(upld):
if u.find(search_str) > -1:
return j
return -1
next_idx = 0
for i, w in enumerate(words):
if i < next_idx:
continue
var_len = 1
search_substr_new = w
temp_res = -1
while findWordInUpload(search_substr_new, upload) > -1:
search_substr = search_substr_new
temp_res = findWordInUpload(search_substr, upload)
if i + var_len < len_word:
var_len += 1
search_substr_new = " ".join(words[i:i + var_len])
else:
break
if temp_res > -1:
result[search_substr] = temp_res
next_idx = i + var_len - 1
print(result)
import itertools
upload = ['wish you happy birthday','take care','good night']
string = 'happy birthday take'.split(' ')
combine = [] # combinations
for u in range(1, len(string)):
for i in itertools.permutations(string, u):
combine += [" ".join(list(i))]
print(combine)
result = {}
for count, words in enumerate(upload):
for strings in combine:
if strings in words:
result[strings] = count
remove = []
# remove duplicate combinations
# e.g. if 'happy birthday' exist, remove 'happy' and 'birthday'
for items in result:
if len(items.split(" ")) > 1:
for u in items.split(" "):
if u in result and u not in remove:
remove += [u]
print(remove)
for items in remove:
del result[items]
print(result)
如果我理解正确的话,这里的一个关键问题是为你的单词组合找到尽可能长的匹配。因此,我将使用递归来检查在上传中可以找到多少个单词:
通过前面提到的搜索字符串,它会输出
{'happy birthday': 0, 'take care': 1, 'good night': 2, 'good': 2}
这就是你要找的吗编辑:简化了整个变量(第一版中的冗余)
您可以使用
itertools
:输出:
蛮力法:
最初,从源代码生成n*(n-1)个元素组合 列出并存储在二维数组中
然后搜索后续的子字符串匹配(或字符串匹配) 先前生成的二维数组中所需的字符串
与所需字符串匹配的生成字符串单元格的索引 给出了预期的结果
相关问题 更多 >
编程相关推荐