Python 3.2 文件I/O:如何根据预设大小限制生成和拆分文件?
我正在写一段代码,用来生成一组字符的所有排列组合。虽然我的代码在Python 2.7中能正常工作,但在Python 3.x中就不行了,因为字符串有很多变化。我想对代码进行调整,但我对Python还不太熟悉,希望你能给我一些帮助。=)
我的问题是,当Python生成你选择的单词列表时,输出文件的大小也会相应增加。我想知道怎么让这个脚本检查文件大小,如果达到预设的大小,就打开一个新文件,继续写入排列组合。
举个例子:
numeric_lowercase.000001
numeric_lowercase.000002
numeric_lowercase.000003
请记住,我看过网站上大部分的例子,但它们在Python 3.2中都不适用。
这是我目前在Python 3.2中能正常工作的代码:
import itertools
import subprocess
import os
from string import digits, ascii_lowercase, ascii_uppercase, punctuation
if os.name == 'nt':
def clear_console():
subprocess.call("cls", shell=True)
return
else:
def clear_console():
subprocess.call("clear", shell=True)
return
def generate_phone_numbers(area_code):
f = open('phones.txt', 'w')
for i in range(2010000, 9999999):
f.write(area_code + str(i) + '\n')
def generate_wordlist(lst_chars, min_digit, max_digit, lst_name):
f = open(lst_name, 'w')
for curr_length in range(min_digit, max_digit + 1):
for curr_digit in itertools.product(lst_chars, repeat=curr_length):
f.write(''.join(curr_digit) + '\n')
print ('')
print (' wgen - Menu')
choice = 0
while int(choice) not in range(1,6):
clear_console()
choice = input('''
1. Phone numbers for a given area code.
2. Numbers.
3. Numbers + Lowercase.
4. Numbers + Lowercase + Uppercase.
5. Numbers + Lowercase + Uppercase + Punctuation.
Enter Option: ''')
print ('')
choice = int(choice)
if choice == 1:
area_code = input('''
Please enter Area Code: ''')
area_code = str(area_code)
area_code = area_code.strip()
if len(area_code) == 3:
print ('')
print (' Generating phone numbers for area code ' + area_code + '.')
print (' Please wait...')
generate_phone_numbers(area_code)
if choice == 2:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits
lst_name = 'numeric.txt'
print ('')
print (' Generating numbers between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 3:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits + ascii_lowercase
lst_name = 'numeric_lowercase.txt'
print ('')
print (' Generating numbers & lowercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 4:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = digits + ascii_lowercase + ascii_uppercase
lst_name = 'numeric_lowercase_uppercase.txt'
print ('')
print (' Generating numbers, lowercase & uppercase between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
if choice == 5:
min_digit = input(' What is the minimum size of the word? ')
min_digit = int(min_digit)
print ('')
max_digit = input(' What is the maximum size of the word? ')
max_digit = int(max_digit)
chars = punctuation
lst_name = 'numeric_lowercase_uppercase_punctuation.txt'
print ('')
print (' Generating numbers, lowercase, uppercase & punctuation between ' + str(min_digit) + ' and ' + str(max_digit) + ' digits.')
print (' Please wait...')
generate_wordlist(chars, min_digit, max_digit, lst_name)
2 个回答
2
只需要稍微配置一下,你就可以使用标准库中内置的RotatingFileHandler来记录日志。
import logging
from logging.handlers import RotatingFileHandler
log = logging.getLogger('myprog.stufflogger')
log.propagate = False #ensure that we don't mess with other logging
#configure RotatingFileHandler
handler = RotatingFileHandler('base_file_name.txt', maxBytes=1024*1024*20)
handler.setFormatter(logging.Formatter('%(message)s')
handler.terminator = '' # default is new line
log.addHandler(handler)
# you can now use any of the log methods to add the values
log.info('stuff')
2
我觉得最好的办法是写一个类,让它像文件一样工作,但能分块处理。你的程序就像往普通文件里写东西一样,往这个对象里写。
下面的实现不会把字符串拆分开(如果你调用 f.write("this is a test")
,整个信息会保证放进一个文件里),只有在超过限制时才会开始一个新文件,所以文件的大小会比分块的大小稍微大一些。这种行为都在 write()
方法里,如果需要的话可以进行修改。
class chunkyfile(object):
def __init__(self, filename, chunksize=1000000, mode="w", encoding=None,
extension="", start=0, digits=6):
self.filename = filename
self.chunksize = chunksize
self.chunkno = start
self.file = None
self.mode = mode
self.encoding = encoding
self.digits = digits
self.extension = ("." * bool(extension) * (not extension.startswith(".")) +
extension)
self.softspace = 0 # for use with print
def _nextfile(self):
self.file and self.file.close()
self.file = open(self.filename + str(self.chunkno).rjust(self.digits, "0") +
self.extension, mode=self.mode, encoding=self.encoding)
self.chunkno += 1
def write(self, text):
self.file and self.file.tell() > self.chunksize and self.close()
self.file or self._nextfile()
self.file.write(text)
# convenience method, equivalent to print(... file=f)
# requires Python 3.x or from __future__ import print in Py2
def print(*objects, sep=" ", end="\n", flush=False):
print(*objects, sep=sep, end=end, flush=flush, file=self)
def writelines(self, lines):
# do it a line at a time in case we need to split
for line in lines: self.write(line)
def flush(self):
self.file and self.file.flush()
def close(self):
self.file = self.file and self.file.close()
# support "with" statement
def __enter__(self):
return self
def __exit__(self, e, value, tb):
self.close()
# now use the file
with chunkyfile(r"C:\test", 10, extension="txt", encoding="utf8") as f:
f.write("FINALLY ROBOTIC BEINGS RULE THE WORLD")
f.write("The humans are dead")
f.write("The humans are dead")
f.write("We used poisonous gasses")
f.write("And we poisoned their asses")