Python 关联程序 - 字母顺序
我正在尝试写一个程序,用来显示一个文件的词汇表。这个程序应该输出文件中独特的单词以及它们出现的频率,并且要按字母顺序排列。我现在写的代码有问题,但我不知道怎么解决。有没有什么建议?
顺便说一下,我对计算机编程一无所知!我上这个课是为了满足高中数学的要求。
f = open(raw_input("Enter a filename: "), "r")
myDict = {}
linenum = 0
for line in f:
line = line.strip()
line = line.lower()
line = line.split()
linenum += 1
for word in line:
word = word.strip()
word = word.lower()
if not word in myDict:
myDict[word] = []
myDict[word].append(linenum)
print "%-15s %-15s" %("Word", "Line Number")
for key in sorted(myDict):
print '%-15s: %-15d' % (key, myDict(key))
5 个回答
0
这是一个关于如何为一个文本文件生成一个按字母顺序排列的词汇表的内容。
f=input('Enter the input file name: ')
inputFile = open(f,"r")
list={}
for word in inputFile.read().split():
if word not in list:
list[word] = 1
else:
list[word] += 1
inputFile.close();
for i in sorted(list):
print("{0} {1} ".format(i, list[i]));
0
你为什么不直接用Counter呢?它就是为这个目的设计的:
In [8]: s = 'How many times does each word show up in this sentence word word show up up'
In [9]: words = s.split()
In [10]: Counter(words)
Out[10]: Counter({'up': 3, 'word': 3, 'show': 2, 'times': 1, 'sentence': 1, 'many': 1, 'does': 1, 'How': 1, 'each': 1, 'in': 1, 'this': 1})
注意:这个具体的解决方案不是我想出来的。它直接来自于Collections模块的Counter Python培训课程
0
这是我解决一致性问题的方案...
https://github.com/jrgosalia/Python/blob/master/problem2_concordance.py
$ python --version
Python 3.5.1
library.py
def getLines(fileName):
""" getLines validates the given fileName.
Returns all lines present in a valid file. """
lines = ""
if (fileName != None and len(fileName) > 0 and os.path.exists(fileName)):
if os.path.isfile(fileName):
file = open(fileName, 'r')
lines = file.read()
if (len(lines) > 0):
return lines
else:
print("<" + fileName + "> is an empty file!", end="\n\n")
else:
print("<" + fileName + "> is not a file!", end="\n\n")
else:
print("<" + fileName + "> doesn't exists, try again!", end="\n\n")
return lines
problem2_concordance.py
from library import getLines
# List of English Punctuation Symbols
# Reference : Took maximum puntuations symbols possible from https://en.wikipedia.org/wiki/Punctuation_of_English
# NOTE: Apostrophe is excluded from the list as having it or not having it will give always distinct words.
punctuations = ["[", "]", "(", ")", "{", "}", "<", ">", \
":", ";", ",", "`", "'", "\"", "-", ".", \
"|", "\\", "?", "/", "!", "-", "_", "@", \
"\#", "$", "%", "^", "&", "*", "+", "~", "=" ]
def stripPunctuation(data):
""" Strip Punctuations from the given string. """
for punctuation in punctuations:
data = data.replace(punctuation, " ")
return data
def display(wordsDictionary):
""" Display sorted dictionary of words and their frequencies. """
noOfWords = 0
print("-" * 42)
print("| %20s | %15s |" % ("WORDS".center(20), "FREQUENCY".center(15)))
print("-" * 42)
for word in list(sorted(wordsDictionary.keys())):
noOfWords += 1
print("| %-20s | %15s |" % (word, str(wordsDictionary.get(word)).center(15)))
# Halt every 20 words (configurable)
if (noOfWords != 0 and noOfWords % 20 == 0):
print("\n" * 2)
input("PRESS ENTER TO CONTINUE ... ")
print("\n" * 5)
print("-" * 42)
print("| %20s | %15s |" % ("WORDS".center(20), "FREQUENCY".center(15)))
print("-" * 42)
print("-" * 42)
print("\n" * 2)
def prepareDictionary(words):
""" Prepare dictionary of words and count their occurences. """
wordsDictionary = {}
for word in words:
# Handle subsequent Occurences
if (wordsDictionary.get(word.lower(), None) != None):
# Search and add words by checking their lowercase version
wordsDictionary[word.lower()] = wordsDictionary.get(word.lower()) + 1
# Handle first Occurence
else:
wordsDictionary[word.lower()] = 1
return wordsDictionary
def main():
""" Main method """
print("\n" * 10)
print("Given a file name, program will find unique words and their occurences!", end="\n\n");
input("Press ENTER to start execution ... \n");
# To store all the words and their frequencies
wordsDictionary = {}
lines = ""
# Get valid input file
while (len(lines) == 0):
fileName = input("Enter the file name (RELATIVE ONLY and NOT ABSOLUTE): ")
print("\n\n" * 1)
lines = getLines(fileName)
# Get all words by removing all puntuations
words = stripPunctuation(lines).split()
# Prepare the words dictionary
wordsDictionary = prepareDictionary(words)
# Display words dictionary
display(wordsDictionary)
"""
Starting point
"""
main()
注意:要运行上面的代码,你还需要library.py,这个文件也在同一个github仓库里。
1
你的缩进有问题。第二个循环在第一个循环外面,所以它只处理了最后一行。(你可以考虑使用4个空格,这样更容易看出缩进)。你打印的方式也不对,你打印的是行号,而不是单词数量。
myDict = {}
linenum = 0
for line in f:
line = line.strip()
line = line.lower()
line = line.split()
linenum += 1
for word in line:
word = word.strip()
word = word.lower()
if not word in myDict:
myDict[word] = []
myDict[word].append(linenum)
print "%-15s %5s %s" %("Word", 'Count', "Line Numbers")
for key in sorted(myDict):
print '%-15s %5d: %s' % (key, len(myDict[key]), myDict[key])
示例输出:
Word Count Line Numbers
- 1: [6]
a 4: [2, 2, 3, 7]
about 1: [6]
alphabetical 1: [4]
编辑 修正了代码中的错误
1
你需要用 myDict[key] 来从字典中获取数据。因为这实际上是一个列表,所以如果你想要计算频率(也就是数量),你需要用 sum(myDict[key])。
f = "HELLO HELLO HELLO WHAT ARE YOU DOING"
myDict = {}
linenum = 0
for word in f.split():
if not word in myDict:
myDict[word] = []
myDict[word].append(linenum)
print "%-15s %-15s" %("Word", "Frequency")
for key in sorted(myDict):
print '%-15s: %-15d' % (key, len(myDict[key]))
结果是:
Word Frequency
ARE : 1
DOING : 1
HELLO : 3
WHAT : 1
YOU : 1