def skipgram(corpus, window_size = 3):
sg = []
for sent in corpus:
sent = sent[0].split()
if len(sent) <= window_size:
sg.append(sent)
else:
for i in range(0, len(sent)-window_size+1):
sg.append(sent[i: i+window_size])
return sg
corpus = [["my name is John"] , ["This PC is black"]]
skipgram(corups)
from nltk import ngrams
def generate_ngrams(sentences,window_size =3):
for sentence in sentences:
yield from ngrams(sentence[0].split(), window_size)
sentences= [["my name is John"] , ["This PC is black"]]
for c in generate_ngrams(sentences,3):
print (c)
#output:
('my', 'name', 'is')
('name', 'is', 'John')
('This', 'PC', 'is')
('PC', 'is', 'black')
下面是一个简单的函数。你知道吗
你并不真的想要一个
skipgram
本身,但是你想要一个按大小划分的块,试试这个:[输出]:
如果需要滚动窗口,即
ngrams
:[输出]:
与ngrams的NLTK类似:
[输出]:
如果你想要实际的技巧图,How to compute skipgrams in python?
[输出]:
试试这个!你知道吗
相关问题 更多 >
编程相关推荐