在目标检测和后续图像处理的流程中,warpAffine 是 OpenCV 提供的强大工具,常用于仿射变换操作。对于 YOLO 等目标检测模型,warpAffine 不仅适合于实现图片的 Letterbox(加黑边缩放) 操作,也可以用于在识别目标后,裁剪出检测区域并进行规范化变换、旋转等操作。
通过调整仿射变换矩阵,我们可以在统一的 API 下完成多个常见的图像处理需求。这种方式特别适合于与 CUDA 加速实现的 Letterbox 代码无缝结合,从而实现不同的功能。
先将图片的裁剪位置的起点移动到图片的(0,0)点。对应的仿射变换矩阵为,其中 ,
,其中, ,dsth和dstw为目标图片的大小最终的仿射变换矩阵为
,其中 ,设置目标图片大小的宽高为框的宽高。M就是最终的仿射变换矩阵。
- 中心点平移到原点
设原图的中心点为 ,其中:
- 旋转
顺时针旋转 角度,相当于逆时针旋转 。
- 旋转后向图片中心移动
图片旋转后,宽高会变化,新宽度 和高度 为:
- 最终变换矩阵计算
python 程序
裁剪后 按照短边resize图片,保持宽高比
import cv2
import numpy as np
from enum import Enum
from typing import List, Tuple, Union
from pydantic import BaseModel, field_validator
class NormType(Enum):
NoneType = 0
MeanStd = 1
AlphaBeta = 2
class ChannelType(Enum):
NoneType = 0
SwapRB = 1
class Norm(BaseModel):
mean: List[float] = [0.0, 0.0, 0.0]
std: List[float] = [1.0, 1.0, 1.0]
alpha: float = 1 / 255.0
beta: float = 0.0
norm_type: NormType = NormType.NoneType
channel_type: ChannelType = ChannelType.NoneType
@field_validator("mean", "std", mode="before")
def validate_length(cls, value, field):
"""Ensure mean and std have exactly 3 elements."""
if len(value) != 3:
raise ValueError(f"{field.alias} must have exactly 3 elements.")
return value
def mean_std(mean: List[float], std: List[float], channel_type: ChannelType = ChannelType.NoneType) -> "Norm":
return Norm(mean=mean, std=std, norm_type=NormType.MeanStd, channel_type=channel_type)
def alpha_beta(alpha: float, beta: float, channel_type: ChannelType = ChannelType.NoneType) -> "Norm":
return Norm(alpha=alpha, beta=beta, norm_type=NormType.AlphaBeta, channel_type=channel_type)
def none() -> "Norm":
return Norm()
def __repr__(self):
return (
f"Norm(mean={self.mean}, std={self.std}, alpha={self.alpha}, beta={self.beta}, "
f"norm_type={self.norm_type}, channel_type={self.channel_type})"
class WarpaffineMatrix:
def __init__(self, matrix: np.ndarray, target: Tuple[int, int]) -> None:
self.matrix = matrix
self.target = target
def __repr__(self):
return (
f"WarpaffineMatrix(matrix={self.matrix}, target={self.target})"
def invert(self) -> np.ndarray:
"""Get the inverse of the affine transformation matrix."""
return np.linalg.inv(np.vstack([self.matrix, [0, 0, 1]]))[:2]
def letter_box_matrix(src: Tuple[int, int], target: Tuple[int, int]) -> "WarpaffineMatrix":
scale = min(target[0] / src[0], target[1] / src[1])
offset_x = (target[0] - scale * src[0]) * 0.5
offset_y = (target[1] - scale * src[1]) * 0.5
matrix = np.array([
[scale, 0, offset_x],
[0, scale, offset_y]
], dtype=np.float32)
return WarpaffineMatrix(matrix=matrix, target=target)
def resize_matrix(src: Tuple[int, int], target: Union[Tuple[int, int], int]) -> "WarpaffineMatrix":
if isinstance(target, tuple):
scale_x = target[0] / src[0]
scale_y = target[1] / src[1]
matrix = np.array([
[scale_x, 0, 0],
[0, scale_y, 0]
], dtype=np.float32)
new_target = target
elif isinstance(target, int):
scale = target / min(src)
new_width = int(src[0] * scale)
new_height = int(src[1] * scale)
matrix = np.array([
[scale, 0, 0],
[0, scale, 0]
], dtype=np.float32)
new_target = (new_width, new_height)
raise ValueError("Target must be either a tuple (width, height) or an integer for the shorter edge.")
return WarpaffineMatrix(matrix=matrix, target=new_target)
def crop_resize_matrix(
start_point: Tuple[int, int],
end_point: Tuple[int, int],
target: Union[Tuple[int, int], int, None] = None
) -> "WarpaffineMatrix":
start_point (Tuple[int, int]): Top-left (x, y).
end_point (Tuple[int, int]): Bottom-right (x, y).
target (Union[Tuple[int, int], int, None]):
If tuple, 使用原始的resize方法.
If int, 使用短边resize方法,保持长宽比.
If None, 相当于只截取
WarpaffineMatrix: Affine transformation matrix and target dimensions.
crop_width = end_point[0] - start_point[0]
crop_height = end_point[1] - start_point[1]
if target is None:
target_width, target_height = crop_width, crop_height
elif isinstance(target, tuple):
target_width, target_height = target
elif isinstance(target, int):
scale = target / min(crop_width, crop_height)
target_width = int(crop_width * scale)
target_height = int(crop_height * scale)
raise ValueError("Target must be a tuple (width, height), an integer, or None.")
scale_x = target_width / crop_width
scale_y = target_height / crop_height
matrix = np.array([
[scale_x, 0, -start_point[0] * scale_x],
[0, scale_y, -start_point[1] * scale_y]
], dtype=np.float32)
return WarpaffineMatrix(matrix=matrix, target=(target_width, target_height))
def flip_matrix(horizontal: bool, vertical: bool, src: Tuple[int, int]) -> "WarpaffineMatrix":
获取镜像变换矩阵 水平镜像、垂直镜像
scale_x = -1 if horizontal else 1
scale_y = -1 if vertical else 1
translate_x = src[0] if horizontal else 0
translate_y = src[1] if vertical else 0
matrix = np.array([[scale_x, 0, translate_x], [0, scale_y, translate_y]], dtype=np.float32)
return WarpaffineMatrix(matrix=matrix, target=src)
def rotate_matrix(src: Tuple[int, int], angle: float) -> "WarpaffineMatrix":
获取顺时针旋转任意角度的仿射变换矩阵, 图片不丢失信息
- src: 原始图片的宽高 (width, height)
- angle: 旋转角度(顺时针为正,单位为度)
- WarpaffineMatrix: 包含旋转变换矩阵的对象
# 转换角度为弧度
theta = np.deg2rad(angle)
# 计算旋转矩阵的元素
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
width, height = src
center_x, center_y = width / 2, height / 2
new_width = int(abs(width * cos_theta) + abs(height * sin_theta))
new_height = int(abs(width * sin_theta) + abs(height * cos_theta))
new_center_x, new_center_y = new_width / 2, new_height / 2
i2d0 = cos_theta
i2d1 = sin_theta
i2d2 = - cos_theta * center_x - sin_theta * center_y + new_center_x
i2d3 = -sin_theta
i2d4 = cos_theta
i2d5 = sin_theta * center_x - cos_theta * center_y + new_center_y
# 构建仿射矩阵
matrix = np.array([[i2d0, i2d1, i2d2], [i2d3, i2d4, i2d5]], dtype=np.float32)
return WarpaffineMatrix(matrix=matrix, target=(new_width, new_height))
class ImageTransformer:
def normilization(image: np.ndarray, norm: Norm) -> np.ndarray:
image = image.astype(np.float32)
if norm.channel_type == ChannelType.SwapRB:
image = image[..., ::-1] # Swap RGB to BGR or vice versa
mean = np.array(norm.mean, dtype=np.float32)
std = np.array(norm.std, dtype=np.float32)
if norm.norm_type == NormType.MeanStd:
image = (image * norm.alpha - mean) / std
elif norm.norm_type == NormType.AlphaBeta:
image = image * norm.alpha + norm.beta
return image
def transform(
image: np.ndarray,
warpaffine_matrix: WarpaffineMatrix,
border_value: Tuple[int, int, int] = (114, 114, 114)
) -> np.ndarray:
if image is None or warpaffine_matrix is None:
raise ValueError("Input image and warpaffine_matrix cannot be None.")
transformed_image = cv2.warpAffine(
return transformed_image
if __name__ == "__main__":
image_path = "test.jpg"
image = cv2.imread(image_path)
h, w, _ = image.shape
letter_box_matrix = WarpaffineMatrix.letter_box_matrix((w, h), (640, 640))
letter_box_image = ImageTransformer.transform(image, letter_box_matrix)
cv2.imwrite("letter_box_image.jpg", letter_box_image)
resize_matrix = WarpaffineMatrix.resize_matrix((w, h), (640, 640))
resize_image = ImageTransformer.transform(image, resize_matrix)
cv2.imwrite("resize_image.jpg", resize_image)
crop_matrix = WarpaffineMatrix.crop_resize_matrix((561, 397), (687, 530))
crop_image = ImageTransformer.transform(image, crop_matrix)
cv2.imwrite("crop_image.jpg", crop_image)
crop_resize_matrix = WarpaffineMatrix.crop_resize_matrix((561, 397), (687, 530), (224, 224))
crop_resize_image = ImageTransformer.transform(image, crop_resize_matrix)
cv2.imwrite("crop_resize_image.jpg", crop_resize_image)
crop_resize_short_matrix = WarpaffineMatrix.crop_resize_matrix((561, 397), (687, 530), 224)
crop_resize_short_image = ImageTransformer.transform(image, crop_resize_short_matrix)
cv2.imwrite("crop_resize_short_image.jpg", crop_resize_short_image)
# flip_matrix
flip_matrix = WarpaffineMatrix.flip_matrix(False, True, (w, h))
flip_image = ImageTransformer.transform(image, flip_matrix)
cv2.imwrite("flip_image_v.jpg", flip_image)
flip_matrix = WarpaffineMatrix.flip_matrix(True, False, (w, h))
flip_image = ImageTransformer.transform(image, flip_matrix)
cv2.imwrite("flip_image_h.jpg", flip_image)
flip_matrix = WarpaffineMatrix.flip_matrix(True, True, (w, h))
flip_image = ImageTransformer.transform(image, flip_matrix)
cv2.imwrite("flip_image_hv.jpg", flip_image)
rotate_matrix = WarpaffineMatrix.rotate_matrix((w, h), 30)
rotate_image = ImageTransformer.transform(image, rotate_matrix)
cv2.imwrite("rotate_image_30.jpg", rotate_image)
cuda 程序
- 仿射变换矩阵 Resize
struct ResizeMatrix
float i2d[6]; // image to dst(network), 2x3 matrix
float d2i[6]; // dst to image, 2x3 matrix
void compute(const std::tuple<int, int> &from, const std::tuple<int, int> &to)
float scale_x = std::get<0>(to) / (float)std::get<0>(from);
float scale_y = std::get<1>(to) / (float)std::get<1>(from);
float scale = std::min(scale_x, scale_y);
// resize
i2d[0] = scale;
i2d[1] = 0;
i2d[2] = 0;
i2d[3] = 0;
i2d[4] = scale;
i2d[5] = 0;
double D = i2d[0] * i2d[4] - i2d[1] * i2d[3];
D = D != 0\. ? double(1.) / D : double(0.);
double A11 = i2d[4] * D, A22 = i2d[0] * D, A12 = -i2d[1] * D, A21 = -i2d[3] * D;
double b1 = -A11 * i2d[2] - A12 * i2d[5];
double b2 = -A21 * i2d[2] - A22 * i2d[5];
d2i[0] = A11;
d2i[1] = A12;
d2i[2] = b1;
d2i[3] = A21;
d2i[4] = A22;
d2i[5] = b2;
- 仿射变换矩阵 裁剪后crop
struct ResizeCropMatrix
float i2d[6]; // image to dst(network), 2x3 matrix
float d2i[6]; // dst to image, 2x3 matrix
// 1 0 -x sx 0 -x*sx
// 0 1 -y -> 0 sy -y*sy
// 0 0 1 0 0 1
void compute(const std::tuple<int, int> &to,
const std::tuple<int, int> &start, const std::tuple<int, int> &end)
int start_x = std::get<0>(start);
int start_y = std::get<1>(start);
int end_x = std::get<0>(end);
int end_y = std::get<1>(end);
int dst_w = std::get<0>(to);
int dst_h = std::get<1>(to);
float scale_x = 1.0f * (end_x - start_x) / dst_w;
float scale_y = 1.0f * (end_y - start_y) / dst_h;
i2d[0] = scale_x;
i2d[1] = 0;
i2d[2] = -start_x * scale_x;
i2d[3] = 0;
i2d[4] = scale_y;
i2d[5] = -start_y * scale_y;
double D = i2d[0] * i2d[4] - i2d[1] * i2d[3];
D = D != 0\. ? double(1.) / D : double(0.);
double A11 = i2d[4] * D, A22 = i2d[0] * D, A12 = -i2d[1] * D, A21 = -i2d[3] * D;
double b1 = -A11 * i2d[2] - A12 * i2d[5];
double b2 = -A21 * i2d[2] - A22 * i2d[5];
d2i[0] = A11;
d2i[1] = A12;
d2i[2] = b1;
d2i[3] = A21;
d2i[4] = A22;
d2i[5] = b2;
- 仿射变换矩阵 letter box
struct AffineMatrix
float i2d[6]; // image to dst(network), 2x3 matrix
float d2i[6]; // dst to image, 2x3 matrix
void compute(const std::tuple<int, int> &from, const std::tuple<int, int> &to)
float scale_x = std::get<0>(to) / (float)std::get<0>(from);
float scale_y = std::get<1>(to) / (float)std::get<1>(from);
float scale = std::min(scale_x, scale_y);
// letter box
i2d[0] = scale;
i2d[1] = 0;
i2d[2] = -scale * std::get<0>(from) * 0.5 + std::get<0>(to) * 0.5 + scale * 0.5 - 0.5;
i2d[3] = 0;
i2d[4] = scale;
i2d[5] = -scale * std::get<1>(from) * 0.5 + std::get<1>(to) * 0.5 + scale * 0.5 - 0.5;
double D = i2d[0] * i2d[4] - i2d[1] * i2d[3];
D = D != 0\. ? double(1.) / D : double(0.);
double A11 = i2d[4] * D, A22 = i2d[0] * D, A12 = -i2d[1] * D, A21 = -i2d[3] * D;
double b1 = -A11 * i2d[2] - A12 * i2d[5];
double b2 = -A21 * i2d[2] - A22 * i2d[5];
d2i[0] = A11;
d2i[1] = A12;
d2i[2] = b1;
d2i[3] = A21;
d2i[4] = A22;
d2i[5] = b2;
- 根据仿射变换矩阵变换图片的cuda程序
static __global__ void warp_affine_bilinear_and_normalize_plane_kernel(
uint8_t *src, int src_line_size, int src_width, int src_height, float *dst, int dst_width,
int dst_height, uint8_t const_value_st, float *warp_affine_matrix_2_3)
int dx = blockDim.x * blockIdx.x + threadIdx.x;
int dy = blockDim.y * blockIdx.y + threadIdx.y;
if (dx >= dst_width || dy >= dst_height) return;
float m_x1 = warp_affine_matrix_2_3[0];
float m_y1 = warp_affine_matrix_2_3[1];
float m_z1 = warp_affine_matrix_2_3[2];
float m_x2 = warp_affine_matrix_2_3[3];
float m_y2 = warp_affine_matrix_2_3[4];
float m_z2 = warp_affine_matrix_2_3[5];
float src_x = m_x1 * dx + m_y1 * dy + m_z1;
float src_y = m_x2 * dx + m_y2 * dy + m_z2;
float c0, c1, c2;
if (src_x <= -1 || src_x >= src_width || src_y <= -1 || src_y >= src_height)
// out of range
c0 = const_value_st;
c1 = const_value_st;
c2 = const_value_st;
int y_low = floorf(src_y);
int x_low = floorf(src_x);
int y_high = y_low + 1;
int x_high = x_low + 1;
uint8_t const_value[] = {const_value_st, const_value_st, const_value_st};
float ly = src_y - y_low;
float lx = src_x - x_low;
float hy = 1 - ly;
float hx = 1 - lx;
float w1 = hy * hx, w2 = hy * lx, w3 = ly * hx, w4 = ly * lx;
uint8_t *v1 = const_value;
uint8_t *v2 = const_value;
uint8_t *v3 = const_value;
uint8_t *v4 = const_value;
if (y_low >= 0)
if (x_low >= 0) v1 = src + y_low * src_line_size + x_low * 3;
if (x_high < src_width) v2 = src + y_low * src_line_size + x_high * 3;
if (y_high < src_height)
if (x_low >= 0) v3 = src + y_high * src_line_size + x_low * 3;
if (x_high < src_width) v4 = src + y_high * src_line_size + x_high * 3;
// same to opencv
c0 = floorf(w1 * v1[0] + w2 * v2[0] + w3 * v3[0] + w4 * v4[0] + 0.5f);
c1 = floorf(w1 * v1[1] + w2 * v2[1] + w3 * v3[1] + w4 * v4[1] + 0.5f);
c2 = floorf(w1 * v1[2] + w2 * v2[2] + w3 * v3[2] + w4 * v4[2] + 0.5f);
int area = dst_width * dst_height;
float *pdst_c0 = dst + dy * dst_width + dx;
float *pdst_c1 = pdst_c0 + area;
float *pdst_c2 = pdst_c1 + area;
*pdst_c0 = c0;
*pdst_c1 = c1;
*pdst_c2 = c2;