如何使用光流估计量化帧之间的差异?

2024-04-23 16:32:23 发布

您现在位置:Python中文网/ 问答频道 /正文

下面是一个代码,用于从稳定视频(无摄像机移动)获得光流输出,并将其保存为一组帧

import cv2 as cv
import numpy as np

# The video feed is read in as a VideoCapture object
cap = cv.VideoCapture("2_stable_video.avi")

# ret = a boolean return value from getting the frame, first_frame = the first frame in the entire video sequence
ret, first_frame = cap.read()

# Converts frame to grayscale because we only need the luminance channel for detecting edges - less computationally expensive

prev_gray = cv.cvtColor(first_frame, cv.COLOR_BGR2GRAY)

# Creates an image filled with zero intensities with the same dimensions as the frame
mask = np.zeros_like(first_frame)

# Sets image saturation to maximum
mask[..., 1] = 255

count = 0
while(cap.isOpened()):
    # ret = a boolean return value from getting the frame, frame = the current frame being projected in the video
    ret, frame = cap.read()

    # Opens a new window and displays the input frame
    cv.imshow("input", frame)

    # Converts each frame to grayscale - we previously only converted the first frame to grayscale
    gray = cv.cvtColor(frame, cv.COLOR_BGR2GRAY)
    # Calculates dense optical flow by Farneback method
    flow = cv.calcOpticalFlowFarneback(prev_gray, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)

    # Computes the magnitude and angle of the 2D vectors
    magnitude, angle = cv.cartToPolar(flow[..., 0], flow[..., 1])

    # Sets image hue according to the optical flow direction
    mask[..., 0] = angle * 180 / np.pi / 2

    # Sets image value according to the optical flow magnitude (normalized)
    mask[..., 2] = cv.normalize(magnitude, None, 0, 255, cv.NORM_MINMAX)

    # Converts HSV to RGB (BGR) color representation
    rgb = cv.cvtColor(mask, cv.COLOR_HSV2BGR)

    # Opens a new window and displays the output frame
    cv.imshow("dense optical flow", rgb[40:150,120:220])
    cv.imwrite("frames_modified_2/%d.png" % count, rgb[40:150,120:220])
    count +=1

    # Updates previous frame
    prev_gray = gray

    # Frames are read by intervals of 1 millisecond. The programs breaks out of the while loop when the user presses the 'q' key
    if cv.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv.destroyAllWindows()

有人能建议如何量化框架之间的差异吗?i、 e.估计速度/速度


Tags: thetoimagereadasvideomaskflow
1条回答
网友
1楼 · 发布于 2024-04-23 16:32:23

下面是一个从.bsq帧获得像素幅度平移的示例。您可以修改代码以输入视频文件。您可能对get_translation()函数最感兴趣。例如:

enter image description here

显示从帧到帧的像素转换的图形

enter image description here

代码

import numpy as np
import argparse
import os
import cv2
from matplotlib import pyplot as plt
from matplotlib import cm
import time
import random

# Usage: python translate_analyzer.py -p <filename.bsq>

# Automatic brightness and contrast optimization with optional histogram clipping
def automatic_brightness_and_contrast(image, clip_hist_percent=25):
    if len(image.shape) == 3:
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    else:
        gray = image

    # Calculate grayscale histogram
    hist = cv2.calcHist([gray],[0],None,[256],[0,256])
    hist_size = len(hist)

    # Calculate cumulative distribution from the histogram
    accumulator = []
    accumulator.append(float(hist[0]))
    for index in range(1, hist_size):
        accumulator.append(accumulator[index -1] + float(hist[index]))

    # Locate points to clip
    maximum = accumulator[-1]
    clip_hist_percent *= (maximum/100.0)
    clip_hist_percent /= 2.0

    # Locate left cut
    minimum_gray = 0
    while accumulator[minimum_gray] < clip_hist_percent:
        minimum_gray += 1

    # Locate right cut
    maximum_gray = hist_size -1
    while accumulator[maximum_gray] >= (maximum - clip_hist_percent):
        maximum_gray -= 1

    # Calculate alpha and beta values
    alpha = 255 / (maximum_gray - minimum_gray)
    beta = -minimum_gray * alpha

    auto_result = cv2.convertScaleAbs(image, alpha=alpha, beta=beta)
    return (auto_result, alpha, beta)

# Draw flow
def draw_flow(img, flow, step=30):
    h, w = img.shape[:2]
    y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1).astype(int)
    fx, fy = flow[y,x].T
    lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
    lines = np.int32(lines + 0.5)
    vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    cv2.polylines(vis, lines, 1, (36, 255, 12))
    for (x1, y1), (_x2, _y2) in lines:
        cv2.circle(vis, (x1, y1), 2, (36, 255, 12), -1)
    return vis

# Return translation value
def get_translation(img, flow, step=30):
    return (np.median(flow[:,:,0].T), flow[:, :, 0].T)

# Get file path
ap = argparse.ArgumentParser()
ap.add_argument("-p", " path", help="Path to the directory")
args = vars(ap.parse_args())

if not args['path']:
    print('Usage: python translate_analyzer.py -p <directory>')
    exit(1)

# Extract file name
bsq_fname = os.path.split(args['path'])[-1]

if '.bsq' not in bsq_fname:
    print('ERROR: Invalid bsq file. Select correct file.')
    exit(1)

width = 640
height = 512
frame_count = int(os.path.getsize(bsq_fname)/(2*height*width))
x,y,w,h = 0,0,100,512

# Simulates calibrated frames to display on video frame
data_file = np.fromfile(bsq_fname, dtype=np.uint16, count=-1)
data_file = data_file.reshape((width, height, frame_count), order='F')
data_file = np.rot90(data_file)

print(bsq_fname)
fname = bsq_fname.split()[0]
prev = data_file[:,:,0].copy()
prev //= 64
prev = automatic_brightness_and_contrast(prev)[0]
prev = prev[y:y+h, x:x+w]

translation_data = []
frame_direction = []
start = time.time()
for index in range(1, frame_count):
    data = data_file[:,:,index].copy()
    data //= 64
    data = automatic_brightness_and_contrast(data)[0]
    data = data[y:y+h, x:x+w]

    flow = cv2.calcOpticalFlowFarneback(prev=prev, next=data, flow=None, pyr_scale=0.5, levels=2, winsize=80, iterations=2, poly_n=7, poly_sigma=4.5, flags=0)
    translation, pixel_direction = get_translation(data, flow)
    prev = data

    cv2.imshow('flow', draw_flow(data, flow))
    cv2.waitKey(1)

    translation_data.append(translation)
    frame_direction = pixel_direction

    index = (index+1) % frame_count

end = time.time()
print('Time:', end - start)

plt.figure()
plt.title(bsq_fname)
plt.xlabel("Frames")
plt.ylabel("Magnitude")
plt.plot(translation_data)

plt.figure()
plt.title("Pixel Direction")
plt.xlabel("Width")
plt.ylabel("Height")
plt.imshow(frame_direction.T)
plt.colorbar(orientation='vertical')
plt.show()

相关问题 更多 >