Python查找可能拼写错误的单词频率和字符串频率，并保存为txt文件或CSV

import os from collections import Counter import glob def word_frequency(fileobj, words): """Build a Counter of specified words in fileobj""" # initialise the counter to 0 for each word ct = Counter(dict((w, 0) for w in words)) file_words = (word for line in fileobj for word in line.split()) filtered_words = (word for word in file_words if word in words) return Counter(filtered_words) def count_words_in_dir(dirpath, words, action=None): """For each .txt file in a dir, count the specified words""" for filepath in glob.iglob(os.path.join(path, '*.txt')): with open(filepath) as f: ct = word_frequency(f, words) if action: action(filepath, ct) def print_summary(filename, ct): words = sorted(ct.keys()) counts = [str(ct[k]) for k in words] print('{0}\n{1}\n{2}\n\n'.format( filepath, ', '.join(words), ', '.join(counts))) words = set(['JUSTICE', "policy payment", "payment", "annuity", "CYNTHEA" ]) count_words_in_dir('./', words, action=print_summary)

1条回答

网友

1楼 · 发布于 2024-04-25 04:13:11

import sys
import os
from collections import Counter
import glob
# def count_words_in_dir(dirpath, words, action=None):
#     """For each .txt file in a dir, count the specified words"""
#     for filepath in glob.iglob(os.path.join(path, '*.txt')):
#         with open(filepath) as f:
#             data = f.read()
#             for key,val in words.items():
#                 print("key is " + key + "\n")
#                 ct = data.count(key)
#                 words[key] = ct
#             if action:
#                 action(filepath, ct)
stdoutOrigin=sys.stdout 
sys.stdout = open("log.txt", "w")
              
def count_words_in_dir(dirpath, words, action=None):
    for filepath in glob.iglob(os.path.join("path", '*.txt')):
        with open(filepath) as f:
            data = f.read()
            for key,val in words.items():
                #print("key is " + key + "\n")
                ct = data.count(key)
                words[key] = ct
            if action:
                 action(filepath, words)


def print_summary(filepath, words):
    print(filepath)
    for key,val in sorted(words.items()):
        print('{0}:\t{1}'.format(
            key,
            val))




filepath = sys.argv[1]
keys = ["keyword",
"keyword"]
words = dict.fromkeys(keys,0)

count_words_in_dir(filepath, words, action=print_summary)

sys.stdout.close()
sys.stdout=stdoutOrigin

相关问题更多 >

编程相关推荐

热门问题

热门文章