Zipfile不压缩Python中的一些SVG文件

2024-05-15 22:10:02 发布

您现在位置:Python中文网/ 问答频道 /正文

我一直在编写一个脚本,将Latex代码转换为epubs中的SVG图像。 其思想是在temp目录中提取epub,找到代码并创建SVG,用代码替换到SVG图像的链接,然后再次压缩所有内容。你知道吗

一切正常,但最终压缩。它压缩除我创建的新SVG之外的所有内容(我检查过它们在临时未压缩epub的Images文件夹中)。下面是一个最小的工作示例:

import zipfile
import os
import shutil

def create_minimal_uncompressed_epub(directory):
    if os.path.exists(directory):
        shutil.rmtree(directory, ignore_errors=True)
    os.makedirs(directory)
    with open(os.path.join(directory, 'mimetype'), 'w') as mimetype:
        mimetype.write('application/epub+zip')
    os.makedirs(os.path.join(directory, 'META-INF'))
    os.makedirs(os.path.join(directory, 'OEBPS'))
    with open(os.path.join(directory, 'META-INF', 'container.xml'), 'w') as container_xml:
        data = ('<?xml version="1.0"?>'
                '<container version="1.0" xmlns="urn:oasis:names:'
                'tc:opendocument:xmlns:container">'
                '<rootfiles>'
                '<rootfile full-path="OEBPS/content.opf" media-type='
                '"application/oebps-package+xml"/>'
                '</rootfiles>'
                '</container>')
        container_xml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'content.opf'), 'w') as content_opf:
        data = ('<?xml version="1.0" encoding="UTF-8" ?><package xmlns='
                '"http://www.idpf.org/2007/opf" xmlns:dc="http://purl.o'
                'rg/dc/elements/1.1/" unique-identifier="db-id" version'
                '="3.0"><metadata><dc:title id="t1">Title</dc:title><dc'
                ':identifier id="db-id">isbn</dc:identifier><meta   pro'
                'perty="dcterms:modified">2014-03-27T09:14:09Z</meta><d'
                'c:language>en</dc:language></metadata><manifest><item '
                'id="toc" properties="nav" href="toc.xhtml" media-type='
                '"application/xhtml+xml" /><item id="ncx" href="toc.ncx'
                '" media-type="application/x-dtbncx+xml" /><item id="te'
                'mplate_css" href="template.css" media-type="text/css" '
                '/><item id="hello" href="1_hello.xhtml" media-type="ap'
                'plication/xhtml+xml" /></manifest><spine toc="ncx"><it'
                'emref idref="hello" /></spine></package>')
        content_opf.write(data)
    with open(os.path.join(directory, 'OEBPS', 'toc.xhtml'), 'w') as toc_xhtml:
        data = ('<?xml version="1.0" encoding="utf-8"?><html '
                'xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="htt'
                'p://www.idpf.org/2007/ops"><head><title>toc.xhtml</t'
                'itle><link href="template.css" rel="stylesheet" type'
                '="text/css" /></head><body><nav id="toc" epub:type="'
                'toc"><h1 class="frontmatter">Table of Contents</h1><'
                'ol class="contents"><li><a href="1_hello.xhtml">Hell'
                'o</a></li></ol></nav></body></html>')
        toc_xhtml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'toc.ncx'), 'w') as toc_ncx:
        data = ('<?xml version="1.0" encoding="UTF-8" ?><ncx version="2005'
                '-1" xml:lang="en" xmlns="http://www.daisy.org/z3986/2005/'
                'ncx/"><head><meta name="dtb:uid" content="isbn"/><meta na'
                'me="dtb:depth" content="1"/></head><docTitle><text></text'
                '></docTitle><navMap><navPoint id="hello" playOrder="1"><n'
                'avLabel><text>cover</text></navLabel><content src="1_hell'
                'o.xhtml" /></navPoint></navMap></ncx>')
        toc_ncx.write(data)
    with open(os.path.join(directory, 'OEBPS', '1_hello.xhtml'), 'w') as hello_xhtml:
        data = ('<?xml version="1.0" encoding="utf-8"?><html xmlns="http://www.w3.or'
                'g/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops"><head><titl'
                'e>1_hello.xhtml</title><link href="template.css" rel="stylesheet" t'
                'ype="text/css" /></head><body><h1>Hello World!</h1></body></html> ')
        hello_xhtml.write(data)
    with open(os.path.join(directory, 'OEBPS', 'template.css'), 'w') as templace_css:
        data = ('h1 {text-align: center;}')
        templace_css.write(data)

def recursive_zip(zipf, directory, folder=None):
    nodes = os.listdir(directory)
    print nodes
    for item in nodes:
        if os.path.isfile(os.path.join(directory, item)):
            zipf.write(os.path.join(directory, item), os.path.join(folder, item), zipfile.ZIP_DEFLATED)
        elif os.path.isdir(os.path.join(directory, item)):
            recursive_zip(zipf, os.path.join(directory, item), os.path.join(folder, item))

def create_svg():
    return 'code here\n'

TEMP_DIR = 'minimal_temp_dir'
SVG_FILENAME = 'minimal_svg_filename.svg'
create_minimal_uncompressed_epub(TEMP_DIR)
with open(os.path.join(TEMP_DIR, 'OEBPS', SVG_FILENAME), 'w') as svgfile:
    svgfile.write(create_svg())
try:
    MINIMAL_EPUB = 'minimal_epub.epub'
    ZIPF = zipfile.ZipFile(MINIMAL_EPUB, 'w')
    ZIPF.write(os.path.join(TEMP_DIR, 'mimetype'), 'mimetype', zipfile.ZIP_STORED)
    for item in os.listdir(TEMP_DIR):
        if os.path.isdir(os.path.join(TEMP_DIR, item)):
            recursive_zip(ZIPF, os.path.join(TEMP_DIR, item), item)
    ZIPF.close()
except: #IOError
    print('\nError compressing file')

函数recursive\u-zip实际上查找每个文件(注意其中的“print nodes”)。不知道为什么svg文件会丢失。 没有错误。svg文件在temp文件夹中,但在我用Sigil打开它时不在压缩的最终版本中。你知道吗


Tags: pathiddataosxmlepubitemcss
1条回答
网友
1楼 · 发布于 2024-05-15 22:10:02

我终于知道发生了什么。我在文件content.opf、名为manifest的标记中找到了一个图像列表,根据International Digital Publishing Forum

The required manifest must provide a list of all the files that are part of the publication (e.g. Content Documents, style sheets, image files, any embedded font files, any included schemas).

因此文件实际上被压缩并包含在zip文件中,但是由于它被重命名为.epub并用Sigil打开,SVG图像没有显示,因为它们没有包含在文件content.opf中。你知道吗

相关问题 更多 >