使用Python 2.76将多个网页打印为PDF

0 投票

1 回答

1207 浏览

提问于 2025-04-18 14:12

下面的脚本一部分是用来把单个网页打印成保存在本地的PDF文件。现在我想让它能把多个网页分别打印成对应的PDF文件。

但是当我运行这个脚本时，它只生成了一个文件。到底哪里出了问题呢？

from pyPdf import PdfFileWriter, PdfFileReader
import StringIO
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from xhtml2pdf import pisa
from PyQt4.QtCore import *
from PyQt4.QtGui import * 
from PyQt4.QtWebKit import *
import os, sys


LN = {'http://www.google':'Google',
      'http://www.smh.com.au/text/':'SMH',
      'http://www.ap.org/products-services/text':'AP'}

for url, file_name in LN.iteritems():

    tem_pdf = "c:\\tem_pdf.pdf"

    app = QApplication(sys.argv)
    web = QWebView()
    #Read the URL given
    web.load(QUrl(url))
    printer = QPrinter()
    #setting format
    printer.setPageSize(QPrinter.A4)
    printer.setOrientation(QPrinter.Landscape)#Landscape / Portrait
    printer.setOutputFormat(QPrinter.PdfFormat)
    #export file
    printer.setOutputFileName(tem_pdf)

    def convertIt():
        web.print_(printer)
        QApplication.exit()

    QObject.connect(web, SIGNAL("loadFinished(bool)"), convertIt)
    app.exec_()
    sys.exit

    outputPDF = PdfFileWriter()
    packet = StringIO.StringIO()
    # create a new PDF with Reportlab
    can = canvas.Canvas(packet, pagesize=letter)
    can.setFont("Helvetica", 9)
    can.save()

    #move to the beginning of the StringIO buffer
    packet.seek(0)
    new_pdf = PdfFileReader(packet)
    # read the existing PDF
    existing_pdf = PdfFileReader(file(tem_pdf, "rb"))
    pages = existing_pdf.getNumPages()
    output = PdfFileWriter()
    # add the "watermark" (which is the new pdf) on the existing page
    for x in range(0,pages):
        page = existing_pdf.getPage(x)
        page.mergePage(new_pdf.getPage(0))
        output.addPage(page)

    # finally, write "output" to a real file

    final_file = 'c:\\' + file_name + '.pdf'
    outputStream = file(final_file, "wb")
    output.write(outputStream)
    outputStream.close()

    print final_file, 'is ready.'

脚本优化自动化处理 pdf生成网页打印

1 个回答

发现使用 pdfkit 可以满足多个PDF生成的需求，但这并不是对上面问题的回答。

更多选项可以在这里找到: http://madalgo.au.dk/~jakobt/wkhtmltoxdoc/wkhtmltopdf_0.10.0_rc2-doc.html

import pdfkit

LN = {'http://www.google.com':'Google',
      'http://www.smh.com.au/text/':'SMH',
      'http://www.ap.org/products-services/text':'AP'}

for url, file_name in LN.iteritems():

    options = {'quiet': '',
                   }

    pdfkit.from_url(url, 'c:\\' + file_name + '.pdf', options=options)

回答于 2025-04-18 由 Python大师

分享举报

使用Python 2.76将多个网页打印为PDF

1 个回答

撰写回答