分析、查找章节，并作为单独的文件写出

infile = open('dracula.txt', 'r') readlines = infile.readlines() toc_list = readlines[74:185] toc_text_lines = [] for line in toc_list: if len(line) > 1: stripped_line = line.strip() toc_text_lines.append(stripped_line) #print(len(toc_text_lines)) chaptitles = [] for text_lines in toc_text_lines: split_text_line = text_lines.split() if split_text_line[-1].isdigit(): chaptitles.append(text_lines) #print(len(chaptitles)) print(chaptitles) infile.close() import re with open('dracula.txt') as f: book = f.readlines() while book: line = book.pop(0) if "CHAPTER" in line and book.pop(0) == '\n': for title in chapters_names_list: ['CHAPTER I.', 'CHAPTER II.', 'CHAPTER III.'] with open("{}.txt".format(chapters_names_list), 'w') :

1条回答

网友

1楼 · 发布于 2024-05-29 03:25:35

我认为你可以受益于发电机，假设其中一本电子书太大，无法放入内存，你会有一些问题。你知道吗

你能做的是构造一种数据处理管道，首先查找文件(电子书.txt)在文件系统中，我们需要记住，我们需要所有的函数都尽可能的通用，一旦我们有了文件名，我们打开它，一次产生一行，最后我们扫描每一行的“第一章”，“第二章”，等等

import os
import re
import fnmatch

def find_files(pattern, path):
    """
    Here you can find all the filenames that match a specific pattern
    using shell wildcard pattern that way you avoid hardcoding
    the file pattern i.e 'dracula.txt'
    """
    for root, dirs, files in os.walk(path):
        for name in fnmatch.filter(files, pattern):
            yield os.path.join(root, name)

def file_opener(filenames):
    """
    Open a sequence of filenames one at a time
    and make sure to close the file once we are done 
    scanning its content.
    """
    for filename in filenames:
        if filename.endswith('.txt'):
            f = open(filename, 'rt')
        yield f
        f.close()

def chain_generators(iterators):
    """
    Chain a sequence of iterators together
    """
    for it in iterators:
        # Look up yield from if you're unsure what it does
        yield from it

def grep(pattern, lines):
    """
    Look for a pattern in a line i.e 'CHAPTER I.'
    """
    pat = re.compile(pattern)
    for line in lines:
        if pat.search(line):
            yield line

# A simple way to use these functions together

logs = find_files('dracula*', 'Path/to/files')
files = file_opener(logs)
lines = chain_generators(files)
each_line = grep('CHAPTER I.', lines)
for match in each_line:
    print(match)

您可以在这些实现的基础上进行构建，以完成您要做的事情。你知道吗

如果这有帮助，请告诉我。你知道吗

相关问题更多 >

编程相关推荐

热门问题

热门文章