计算文本文件中特定单词的出现次数，并打印其中出现次数最多的50个单词

from collections import Counter with open("./Text_file.txt", "r", encoding='utf8') as logfile: word_counts = Counter(logfile.read().split()) with open("./key_words.txt", "r", encoding='utf8') as word: lines = word.readlines() for line in lines: count = [word_counts.get('line')] lst = sorted (count) print (lst[:50])

3条回答

网友

1楼 · 编辑于 2024-04-19 21:55:47

一种选择

from collections import Counter

# Read keywords
with open("./key_words.txt", "r", encoding='utf8') as keyfile:
  # Use set of keywords (@MisterMiyagi comment)
  keywords = set(keyfile.read().split('\n'))

# Process words
with open("./Text_file.txt", "r", encoding='utf8') as logfile:
  cnts = Counter()
  for line in logfile:
    if line:
      line = line.rstrip()
      # only count keywords
      cnts.update(word for word in line.split() if word in keywords)

# Use counter most_common to get most popular 50
print(cnts.most_common(50))

使用计数器+正则表达式的替代方法

正则表达式用于将单词与标点符号分开，如句号、引号、逗号等

import re
from collections import Counter

with open("./key_words.txt", "r", encoding='utf8') as keyfile:
  keywords = keyfile.read().lower().split('\n')

with open("./Text_file.txt", "r", encoding='utf8') as logfile:
  cnts = Counter()
  for line in logfile:
    # use regex to separate words from punctuation
    # lowercase words
    words = map(lambda x:x.lower(), re.findall('[a-zA-Z]+', line, flags=re.A))
    cnts.update(word for word in words if word in keywords)

print(cnts.most_common(50))

网友
2楼 · 编辑于 2024-04-19 21:55:47

在这里word_counts.get('line')，您只需要在每次迭代中调用line，这就是为什么结果列表只有一个值。以下是您对关键词前50个单词的修改代码
from collections import Counter with open("./Text_file.txt", "r", encoding='utf8') as logfile: word_counts = Counter(logfile.read().split()) wc = dict(word_counts) kwc = {} #keyword counter with open("./key_words.txt", "r", encoding='utf8') as word: lines = word.readlines() for line in lines: line = line.strip() #assuming each word is in separate line, removes '\n' character from end of line if line in wc.keys(): kwc.update({line:wc[line]}) # if keyword is found, adds that to kwc lst = sorted (kwc, key = kwc.get, reverse = True) #sorts in decreasing order on value of dict print (lst[:50])

网友
3楼 · 编辑于 2024-04-19 21:55:47

以下是您可以做的：

from collections import Counter

with open("./Text_file.txt", "r") as file,open("./key_words.txt", "r") as word:
    words1 = [w.strip() for w in file.read().split()] # Strore words from text file into list
    words2 = [w.strip() for w in word.read().split()] # Strore words from key file into list

s = [w1 for w1 in words1 if w1 in words2] # List all words from text file that are in key file

d = Counter(s) # Diction that stores each word from s with the amount of times the word occurs in s

lst = [w for k,w in sorted([(v,k) for k,v in d.items()],reverse=True)[:50]]

print(lst)

相关问题更多 >

编程相关推荐

热门问题

热门文章