阅读文本文件并从关键词列表中查找特定单词

from sys import argv from string import punctuation script = argv[0] all_filenames = argv[1:] print "Text file to import and read: " + all_filenames print "\nReading file...\n" text_file = open(all_filenames, 'r') all_lines = text_file.readlines() #print all_lines text_file.close() for all_filenames in argv[1:]: print "I get: " + all_filenames print "\nFile read finished!" #print "\nYour file contains the following text information:" #print "\n" + text_file.read() #~ for word, count in word_freq.items(): #~ print word, count keyWords = ['God', 'Nation', 'nation', 'USA', 'Creater', 'creater', 'Country', 'Almighty', 'country', 'People', 'people', 'Liberty', 'liberty', 'America', 'Independence', 'honor', 'brave', 'Freedom', 'freedom', 'Courage', 'courage', 'Proclamation', 'proclamation', 'United States', 'Emancipation', 'emancipation', 'Constitution', 'constitution', 'Government', 'Citizens', 'citizens'] for word in keyWords: if word in word_freq: output_file.write( "%s: %d\n" % (word, word_freq[word]) ) output_file = open("List_of_words.txt", "w") for word in keyWords: if word in word_freq: output_file.write( "%s: %d\n" % (word, word_freq[word]) ) output_file.close()

import fileinput for line in fileinput.input('List_of_words.txt', inplace = True): if line.startswith('Existing file that was read'): #if line starts Existing file that was read then do something here print "Existing file that was read" elif line.startswith('New file that was read'): #if line starts with New file that was read then do something here print "New file that was read" else: print line.strip()

1条回答

网友

1楼 · 发布于 2024-04-26 15:08:14

这样你就可以在屏幕上看到结果了。

from sys import argv
from collections import Counter
from string import punctuation

script, filename = argv

text_file = open(filename, 'r')

word_freq = Counter([word.strip(punctuation) for line in text_file for word in line.split()])

#~ for word, count in word_freq.items():
    #~ print word, count

key_words = ['God', 'Nation', 'nation', 'USA', 'Creater', 'creater'
             'Country', 'country', 'People', 'people', 'Liberty', 'liberty',
             'honor', 'brave', 'Freedom', 'freedom', 'Courage', 'courage']

for word in key_words:
    if word in word_freq:
        print word, word_freq[word]

现在你必须把它保存在文件中。

更多文件使用

for filename in argv[1:]:
   # do your job

编辑：

使用此代码（my_script.py）

for filename in argv[1:]:
   print( "I get", filename )

你可以运行脚本

python my_script.py file1.txt file2.txt file3.txt

然后得到

I get file1.txt 
I get file2.txt 
I get file3.txt

你可以用它来计算许多文件中的单词。

使用readlines()可以将所有行读入内存，因此需要更多内存-对于非常大的文件，这可能是个问题。

在当前版本中Counter()计算所有行中的所有单词-测试它-但使用较少的内存。
因此，使用readlines()可以得到相同的word_freq，但是使用更多的内存。

writelines(list_of_result)不会在每一行后添加“\n”-也不会在“God:3”中添加“：”

最好用类似的东西

output_file = open("List_of_words.txt", "w")

for word in key_words:
    if word in word_freq:
        output_file.write( "%s: %d\n" % (word, word_freq[word]) )

output_file.close()

编辑：新版本-将结果附加到单词列表的末尾。txt

from sys import argv
from string import punctuation
from collections import *

keyWords = ['God', 'Nation', 'nation', 'USA', 'Creater', 'creater', 'Country', 'Almighty',
             'country', 'People', 'people', 'Liberty', 'liberty', 'America', 'Independence', 
             'honor', 'brave', 'Freedom', 'freedom', 'Courage', 'courage', 'Proclamation',
             'proclamation', 'United States', 'Emancipation', 'emancipation', 'Constitution',
             'constitution', 'Government', 'Citizens', 'citizens']


for one_filename in argv[1:]:

    print "Text file to import and read:", one_filename
    print "\nReading file...\n"

    text_file = open(one_filename, 'r')
    all_lines = text_file.readlines()
    text_file.close()

    print "\nFile read finished!"

    word_freq = Counter([word.strip(punctuation) for line in all_lines for word in line.split()])

    print "Append result to the end of file: List_of_words.txt"

    output_file = open("List_of_words.txt", "a")

    for word in keyWords:
        if word in word_freq:
            output_file.write( "%s: %d\n" % (word, word_freq[word]) )

    output_file.close()

编辑：将结果总和写入一个文件

from sys import argv
from string import punctuation
from collections import *

keyWords = ['God', 'Nation', 'nation', 'USA', 'Creater', 'creater', 'Country', 'Almighty',
             'country', 'People', 'people', 'Liberty', 'liberty', 'America', 'Independence', 
             'honor', 'brave', 'Freedom', 'freedom', 'Courage', 'courage', 'Proclamation',
             'proclamation', 'United States', 'Emancipation', 'emancipation', 'Constitution',
             'constitution', 'Government', 'Citizens', 'citizens']

word_freq = Counter()

for one_filename in argv[1:]:

    print "Text file to import and read:", one_filename
    print "\nReading file...\n"

    text_file = open(one_filename, 'r')
    all_lines = text_file.readlines()
    text_file.close()

    print "\nFile read finished!"

    word_freq.update( [word.strip(punctuation) for line in all_lines for word in line.split()] )

print "Write sum of results: List_of_words.txt"

output_file = open("List_of_words.txt", "w")

for word in keyWords:
    if word in word_freq:
        output_file.write( "%s: %d\n" % (word, word_freq[word]) )

output_file.close()

相关问题更多 >

编程相关推荐

热门问题

热门文章