使用Python在PDF中查找、替换和调整图像
我有一段Python代码,它可以在PDF文件中找到并替换图片,但我在调整新图片的大小时遇到了困难。现在这段代码是找到旧图片,然后用旧图片的大小和位置来放置新图片。我希望能够自由修改新图片的高度和宽度,但仍然能把它放在旧图片的同一个位置上。我的目标是同时在多个PDF文件中替换一个logo。谢谢大家的建议。
from pikepdf import Pdf, PdfImage, Name
from PIL import Image
import zlib
import os
# Path to the folder containing the input PDF files
input_folder = r'C:\input folder'
# Path to the folder where the modified PDF files will be saved
output_folder = r'C:\output folder'
# Path to the replacement image
replacement_image_path = r'C:\new image to replace'
def replace_images_in_pdf(input_pdf_path, output_pdf_path, image_path):
pdf = Pdf.open(input_pdf_path, allow_overwriting_input=True)
replacement_image = Image.open(image_path)
image_replaced = False # Track if an image has been replaced
for page in pdf.pages:
if image_replaced: # If an image has already been replaced, stop processing further pages
break
for image_key in list(page.images.keys()):
raw_image = page.images[image_key]
pdf_image = PdfImage(raw_image)
raw_image = pdf_image.obj
pillow_image = pdf_image.as_pil_image()
# Resize the replacement image to match the original image's dimensions
replacement_image_resized = replacement_image.resize((pillow_image.width, pillow_image.height))
# Replace the original image
raw_image.write(zlib.compress(replacement_image_resized.tobytes()), filter=Name("/FlateDecode"))
raw_image.ColorSpace = Name("/DeviceRGB")
raw_image.Width, raw_image.Height = pillow_image.width, pillow_image.height
image_replaced = True # Mark that an image has been replaced
break # Exit the loop after replacing the first image
pdf.save(output_pdf_path)
pdf.close()
def process_folder(input_folder, output_folder, replacement_image_path):
if not os.path.exists(output_folder):
os.makedirs(output_folder)
for input_file in os.listdir(input_folder):
if input_file.lower().endswith('.pdf'):
input_pdf_path = os.path.join(input_folder, input_file)
output_pdf_path = os.path.join(output_folder, input_file)
replace_images_in_pdf(input_pdf_path, output_pdf_path, replacement_image_path)
# Run the process
process_folder(input_folder, output_folder, replacement_image_path)
1 个回答
1
借鉴KJ的想法,使用红色标记注释来调整图像的显示区域大小,使用的是PyMuPDF这个工具:
# Assumption: we already know which image we want to deal with.
# For example:
import fitz # PyMuPDF
doc=fitz.open("input.pdf")
page=doc[0]
page.get_images()
[(3, 0, 1280, 720, 8, 'DeviceRGB', '', 'Img3', 'DCTDecode')]
# we have an image at xref 3.
# remove it and re-insert it in a different rectangle, 90° rotated.
img_bbox = page.get_image_rects(3)[0] # old display location
img = doc.extract_image(3) # extract image
page.add_redact_annot(img_bbox) # mark image area for removal
'Redact' annotation on page 0 of landscape.pdf
# remove image
page.apply_redactions(images=fitz.PDF_REDACT_IMAGE_REMOVE)
# re-insert in new rectangle 90° rotated anti-clockwise
page.insert_image((100,100,400,500), stream=img["image"], rotate=90)
# save at max compression to new file
doc.ez_save("output.pdf")