计算文件的字母数并创建直方图

def LetterCount(file_path): file_path = file_path.lower().translate(file_path) file_path = file_path.translate(string.punctuation) file_path = file_path.strip(string.punctuation + string.whitespace) list1=list(file_path) lcDict= {} with open(file_path,'r') as f: for l in list1: if l.isalpha(): if l in lcDict: lcDict[l] +=1 else: lcDict[l]= 1 return lcDict file_path = '/myfolder/text.txt' if __name__ == "__main__": print(LetterCount(file_path)) def histogram(file_path): sumValues = LetterCount(file_path) padding = max(len(sumValues), len('Element')) padding1 = max(len(str(max(sumValues))), len('Value')) print("\nCreating a histogram from values: ") print("%s %10s %10s" %("Element", "Value", "Histogram")) for i,n in enumerate(sumValues, start=1): ('{0} {1} {2}'.format( str(i).ljust(padding), str(i).rjust(padding1), '*'*n)) print(histogram(file_path)

2条回答

网友

1楼 · 编辑于 2024-05-14 18:00:59

您可以使用一些标准库来让您的生活更轻松

import collections
import re

# Open the file
with open("./file.txt", 'r') as f:
    txt = f.read()

# Find all the alphabetic characters
letters = re.findall("[a-zA-Z]", txt)
# Count them
counts = collections.Counter(letters)

# Print the star histogram
for i in 'abcdefghijklmnopqrstuvwxyz':
    if i in counts:
        print(f"{i} | {'*' * counts[i]}")
    else: print(f"{i} | ")

网友

2楼 · 编辑于 2024-05-14 18:00:59

因为我没有你的文件，也无法复制你的具体例子，所以我会分开回答这两个问题

首先，为了为您的文件（表示为字符串列表）创建直方图作为字典，请遵循以下代码部分：

list_of_sentences = ["this is my first code in python", "it's rainy today", "thanks"]

m_dict = {}
for sentence in list_of_sentences:
    for letter in sentence:
        if letter.isalpha():
            if letter in m_dict.keys():
                m_dict[letter]+= 1
            else:
                m_dict[letter] =1
print(m_dict)

输出：

{'t': 6, 'h': 3, 'i': 6, 's': 5, 'm': 1, 'y': 4, 'f': 1, 'r': 2, 'c': 1, 'o': 3, 'd': 2, 'e': 1, 'n': 4, 'p': 1, 'a': 3, 'k': 1}

上面的方法将迭代文件中的字母并计数，如果您想迭代a到z，那么对于大文件将非常有效（此外，它还将打印文件中不存在的字母），您最好使用以下方法：

for code in range(ord('a'), ord('z') + 1):
    m_dict[chr(code)] = ''.join(list_of_sentences).count(chr(code))

输出：

{'t': 6, 'h': 3, 'i': 6, 's': 5, 'm': 1, 'y': 4, 'f': 1, 'r': 2, 'c': 1, 'o': 3, 'd': 2, 'e': 1, 'n': 4, 'p': 1, 'a': 3, 'k': 1, 'b': 0, 'g': 0, 'j': 0, 'l': 0, 'q': 0, 'u': 0, 'v': 0, 'w': 0, 'x': 0, 'z': 0}

现在，当我们手中有了柱状图（让我们继续第一个柱状图），让我们按照您的意愿面对格式化柱状图的第二部分：

def print_as_histogram(m_dict):
    for letter in sorted(m_dict.keys()):
        print(f'{letter} | {"*"*m_dict[letter]}')

print_as_histogram(m_dict)

输出：

a | ***
c | *
d | **
e | *
f | *
h | ***
i | ******
k | *
m | *
n | ****
o | ***
p | *
r | **
s | *****
t | ******
y | ****

把信分类，因为在我看来它看起来更好

相关问题更多 >

编程相关推荐

热门问题

热门文章