wordcounts = []
with open(filepath) as f:
text = f.read()
sentences = text.split('.')
for sentence in sentences:
words = sentence.split(' ')
wordcounts.append(len(words))
average_wordcount = sum(wordcounts)/len(wordcounts)
def read_lines_from_file(file_name):
with open(file_name, 'r') as f:
for line in f:
yield line.strip()
def average_words(sentences):
counts = []
for sentence in sentences:
counts.append(sentence.split())
return float(sum(counts)/len(counts))
print average_words(read_lines_from_file(file_name))
天真的方式:
严肃的方法是:使用nltk根据目标语言规则标记文本。
这应该能帮你。但这是基本的东西,你至少应该自己尝试一下。
这段代码假设每个句子都在一个新行上。
如果不是这样的话,你可以修改代码,或者在你的问题中反映出来,这个问题还不清楚。
相关问题 更多 >
编程相关推荐