稳定扩散管始终输出512*512的图像,无论输入分辨率如何
我正在制作一个修复图像的应用程序,结果快要达到了,但有个问题就是无论我输入什么分辨率,最后输出的图像都是512*512的大小。我是在CPU上运行这个程序的,使用的是经过onnx转换的、适合AMD的稳定扩散版本。
以下是我认为相关的代码:
class CustomDiffuser:
def __init__(self, provider:Literal['CPUExecutionProvider', 'DmlExecutionProvider']='CPUExecutionProvider'):
self.pipe_text2image = None
self.pipe_inpaint = None
self.image = None
self.sam = None
self.provider = provider
def load_model_for_inpainting(
self,
path: str = '../stable_diffusion_onnx_inpainting',
safety_checker=None
):
self.pipe_inpaint = OnnxStableDiffusionInpaintPipeline.from_pretrained(path, provider=self.provider, revision='onnx', safety_checker=safety_checker)
def inpaint_with_prompt(
self,
image: cv2.typing.MatLike | Image.Image,
mask: cv2.typing.MatLike | Image.Image,
height: int,
width: int,
prompt: str = '',
negative: str = '',
steps: int = 10,
cfg: float = 7.5,
noise: float = 0.75
):
pipe = self.pipe_inpaint
image = image.resize((width, height))
mask = mask.resize((width, height))
output_image = pipe(
prompt,
image,
mask,
#strength=noise,
guidance_scale=cfg
)
return output_image
diffuser = CustomDiffuser('CPUExecutionProvider')
diffuser.load_model_for_inpainting('C:/path/to/repository/stable_diffusion_onnx_inpainting')
output = diffuser.inpaint_with_prompt(
Image.open(image_path),
Image.fromarray(headless_selfie_mask.astype(np.uint8)),
576, #height first
384,
'a picture of a man dressed in a darth vader costume, full body shot, front view, light saber',
''
)