def tokenize(word):
token=[]
words = word.split(' ')
for word in words:
for i in range(len(word)):
if i==0: continue
w = word[i]
if i==1:
token+=[word[0]+w]
continue
token+=[token[-1:][0]+w]
return ",".join(token)
def tokenize_autocomplete(phrase):
a = []
for word in phrase.split():
j = 1
while True:
for i in range(len(word) - j + 1):
a.append(word[i:i + j])
if j == len(word):
break
j += 1
return a
使用标记化字符串构建index+文档(搜索API)
index = search.Index(name='item_autocomplete')
for item in items: # item = ndb.model
name = ','.join(tokenize_autocomplete(item.name))
document = search.Document(
doc_id=item.key.urlsafe(),
fields=[search.TextField(name='name', value=name)])
index.put(document)
就像@Desmond Lua answer一样,但是有不同的标记化函数:
它将把
hello world
解析为he,hel,hell,hello,wo,wor,worl,world
。它有助于光自动完成
如Full Text Search and LIKE statement所述,不可能,因为搜索API实现全文索引。
希望这有帮助!
虽然LIKE语句(部分匹配)在全文搜索中不受支持,但您可以绕过它。
首先,为所有可能的子字符串(hel lo=h、he、hel、lo等)标记数据字符串
使用标记化字符串构建index+文档(搜索API)
执行搜索,沃拉!
https://code.luasoftware.com/tutorials/google-app-engine/partial-search-on-gae-with-search-api/
相关问题 更多 >
编程相关推荐