如何在用Python制作剪辑软件时同步音频和视频？

Question

import cv2
import numpy as np
import pyautogui
import pyaudio
import wave
import os
from moviepy.editor import VideoClip, AudioFileClip
from collections import deque
from keyboard import is_pressed
from datetime import datetime

# Constants
CLIP_DURATION = 120  # Total duration of the clip in seconds
FPS = 30  # Frames per second
SHORTCUT_KEY = "ctrl+shift+c"  # Keyboard shortcut to start/stop recording
AUDIO_CHANNELS = 2  # Stereo audio
AUDIO_SAMPLE_RATE = 44100  # Sample rate in Hz
AUDIO_BUFFER_SIZE = 2048  # Adjust buffer size based on audio distortion

def on_shortcut_press():
    if is_pressed(SHORTCUT_KEY):
        save_clip()
        print(f"Recording stopped at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

def save_clip(frames, audio_frames, start_time):
    print("Saving clip...")
    
    if len(frames) < FPS * 2:  # Check if at least 2 seconds of frames are available
        print("Insufficient frames collected. Aborting clip save.")
        return

    # Set up PyAudio
    audio = pyaudio.PyAudio()
    audio_stream = audio.open(format=pyaudio.paInt16,
                              channels=AUDIO_CHANNELS,
                              rate=AUDIO_SAMPLE_RATE,
                              input=True,
                              frames_per_buffer=AUDIO_BUFFER_SIZE)

    # Write frames to video
    video_clip = VideoClip(make_frame=lambda t: frames[int(t * FPS)], duration=len(frames) / FPS)
    video_name = f"clip_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.mp4"
    video_clip.write_videofile(video_name, fps=FPS, codec='libx264')

    # Write audio to file
    audio_filename = f"audio_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.wav"
    with wave.open(audio_filename, "wb") as wavefile:
        wavefile.setnchannels(AUDIO_CHANNELS)
        wavefile.setsampwidth(audio.get_sample_size(pyaudio.paInt16))
        wavefile.setframerate(AUDIO_SAMPLE_RATE)
        audio_frames_data = b''.join([audio[0] for audio in audio_frames])
        wavefile.writeframes(audio_frames_data)

    # Combine audio with video
    final_video_clip = video_clip.set_audio(AudioFileClip(audio_filename))
    final_clip_name = f"final_clip_{start_time.year}-{start_time.month}-{start_time.day}_{start_time.hour}-{start_time.minute}-{start_time.second}.mp4"
    final_video_clip.write_videofile(final_clip_name, codec='libx264', audio_codec='aac', fps=FPS)
    print(f"Final clip saved as {final_clip_name}")

    # Clean up
    os.remove(video_name)
    os.remove(audio_filename)
    audio_stream.stop_stream()
    audio_stream.close()
    audio.terminate()

def main(audio_stream):
    print("Starting main loop...")
    frames = deque(maxlen=CLIP_DURATION * FPS)
    audio_frames = deque(maxlen=int(CLIP_DURATION * AUDIO_SAMPLE_RATE / AUDIO_BUFFER_SIZE))
    start_time = datetime.now()  # Reference time for the start of recording

    while True:
        # Capture video frame
        frame = np.array(pyautogui.screenshot())
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Capture audio frame
        audio_data = audio_stream.read(AUDIO_BUFFER_SIZE, exception_on_overflow=False)

        # Timestamp frames
        frame_timestamp = datetime.now() - start_time
        audio_timestamp = len(audio_frames) / (AUDIO_SAMPLE_RATE / AUDIO_BUFFER_SIZE)

        # Append frames to queues
        frames.append(frame)
        audio_frames.append((audio_data, audio_timestamp))

        # Check if it's time to save the clip
        if (datetime.now() - start_time).total_seconds() >= CLIP_DURATION or is_pressed(SHORTCUT_KEY):
            save_clip(frames, audio_frames, start_time)
            # Clear frames for the next clip
            frames.clear()
            audio_frames.clear()
            start_time = datetime.now()
            audio_stream = audio.open(format=pyaudio.paInt16,
                                      channels=AUDIO_CHANNELS,
                                      rate=AUDIO_SAMPLE_RATE,
                                      input=True,
                                      frames_per_buffer=AUDIO_BUFFER_SIZE)

if __name__ == "__main__":
    audio = pyaudio.PyAudio()
    audio_stream = audio.open(format=pyaudio.paInt16,
                              channels=AUDIO_CHANNELS,
                              rate=AUDIO_SAMPLE_RATE,
                              input=True,
                              frames_per_buffer=AUDIO_BUFFER_SIZE)
    main(audio_stream)

上面是我现在的代码，运行得很好，但视频和音频不同步，视频比音频快了很多。希望能得到一些帮助。

我尝试制作一个剪辑软件，原本希望它能正常工作，确实能运行，但音频和视频不同步。我知道如果把缓冲区的大小改成2048以外的值，声音就会变得很糟糕，但除此之外我真的不知道该怎么办了。

编码解码多媒体处理视频编辑音频同步缓冲区大小

如何在用Python制作剪辑软件时同步音频和视频？

0 个回答

撰写回答