在网页上搜索单词

#!/usr/bin/python # -*- coding: utf-8 -*- # Tested Python version: 2.7.12 # # Run "./script.py [inputfile.txt] [outputfile.txt]" # # Exit codes: # 1 - Python version not tested # 2 - Wrong number command-line arguments # 3 - Input file, with this name, does not exist # 4 - Output file, with this name, already exists # 5 - Problem with input file # 6 - Problem with output file import os, sys import urllib2, re # Check python version req_version = (2, 7) if not sys.version_info[:2] == req_version: print '...' print 'Not tested Python version (2.7).' print 'Your Python version: ', sys.version_info[:2] print '...' sys.exit(1) # Check command-line arguments if len(sys.argv) < 3: print '...' print 'Missing command-line argument(s).' print 'Argument list:', str(sys.argv) print '...' sys.exit(2) # Check if files exist if not os.path.exists(sys.argv[1]): print '...' print 'Input file %s was not found.' % sys.argv[1] print '...' sys.exit(3) if os.path.exists(sys.argv[2]): print '---' print 'Output file %s already exists.' % sys.argv[2] print '---' sys.exit(4) # Read input file line by line, make a list of URL-s and write the # results to output file inputfile = sys.argv[1] outputfile = sys.argv[2] print '---' print 'Reading input file %s ..' % inputfile print '---' results = [] try: with open(inputfile, 'r') as in_f: for line in in_f: url = line.strip().split(',')[0] word = line.strip().split(',')[1] site = urllib2.urlopen(url).read() print 'Found "%s" on "%s" ->' % (word, url) # matches = re.search(word) # if re.search(word, url): # if len(matches) == 0: if site.find(word) != -1: print 'YES' results.append('.'.join(url, word + ' YES'))) else: print 'NO' results.append('.'.join(url, word + ' NO'))) except: print 'Error reading the file' sys.exit(5) #if not inputfile.closed: # inputfile.close() print '>>>' + inputfile + ' closed: ' + inputfile.closed print '...' print 'Writing results to output file %s ..' % outputfile print '...' try: with open(outputfile, 'w'): for item in results: outputfile.write((results) + '\n') print '>>>' + outputfile.read() except: print 'Error writing to file' sys.exit(6) #if not outputfile.closed: # outputfile.close() print '>>>' + outputfile + ' closed: ' + outputfile.closed print '' print '>>> End of script <<<' print ''

1条回答

网友

1楼 · 发布于 2024-04-25 20:11:59

代码中的错误是由于在results列表中附加了错误数量的参数。你知道吗

results.append(url, word + ' YES')

可以编写为附加url、单词和由,分隔的结论的连接字符串：

results.append(','.join((url, word, 'YES')))

奖金：

可以在代码中更改的内容

以下代码块：

url = line.strip().split(',')[0]
word = line.strip().split(',')[1]

可以重写为：

url, word = line.strip().split(',')

从拆分行保存两次

以下块可以作为上下文管理器隐式关闭句柄文件来删除。你知道吗

if not inputfile.closed:
     inputfile.close()
print '>>>' + inputfile + ' closed: ' + inputfile.closed

以及

if not outputfile.closed:
     outputfile.close()
print '>>>' + outputfile + ' closed: ' + outputfile.closed

最后，输出\u f没有写入到。这是一个潜在的AttributeError调用对string的写操作。你知道吗

可以在代码中更改的内容

相关问题更多 >

编程相关推荐

热门问题

热门文章