CUDA图像处理 | 模板匹配
程序员文章站
2022-04-01 09:44:05
...
模板匹配
OpenCV中的模板匹配
匹配方法
-
cv::TM_SQDIFF
:该方法使用平方差进行匹配,因此最佳的匹配结果在结果为0处,值越大匹配结果越差。 -
cv::TM_CCORR
:相关性匹配方法,该方法使用源图像与模板图像的卷积结果进行匹配,因此,最佳匹配位置在值最大处,值越小匹配结果越差。 -
cv::TM_CCORR_NORMED
:归一化的相关性匹配方法,与相关性匹配方法类似,最佳匹配位置也是在值最大处。 -
cv::TM_CCOEFF
:相关性系数匹配方法 ,该方法使用源图像与其均值的差、模板与其均值的差二者之间的相关性进行匹配,最佳匹配结果在值等于1处,最差匹配结果在值等于-1处,值等于0直接表示二者不相关。 -
cv::TM_CCOEFF_NORMED
:归一化的相关性系数匹配方法,正值表示匹配的结果较好,负值则表示匹配的效果较差,也是值越大,匹配效果也好。
代码实现
int main() {
Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);
Mat image_matched;
//模板匹配
/*
void cv::matchTemplate(
cv::InputArray image, // 用于搜索的输入图像, 8U 或 32F, 大小 WxH
cv::InputArray templ, // 用于匹配的模板,和image类型相同, 大小 wxh
cv::OutputArray result, // 匹配结果图像, 类型 32F, 大小 (W-w+1)x(H-h+1)
int method // 用于比较的方法
);
*/
cv::matchTemplate(image_source, image_template, image_matched, cv::TM_CCOEFF_NORMED);
double minVal, maxVal;
cv::Point minLoc, maxLoc;
//寻找最佳匹配位置
// 这里注意匹配结果图像与原图像之间的大小关系
cv::minMaxLoc(image_matched, &minVal, &maxVal, &minLoc, &maxLoc);
cv::Mat image_color;
cv::cvtColor(image_source, image_color, CV_GRAY2BGR); // 将灰度图转成BGR格式
cv::circle(image_color,
cv::Point(maxLoc.x + image_template.cols / 2, maxLoc.y + image_template.rows / 2),
20,
cv::Scalar(0, 0, 255),
2,
8,
0);
cv::imshow("source image", image_source);
cv::imshow("match result", image_matched);
cv::imshow("target", image_color);
cv::waitKey(0);
return 0;
}
C++实现模板匹配
归一化的相关性系数匹配方法 实现
double MatchTemplateCPU(BYTE *pSearchImage,
BYTE *pTemplate,
int nSearchImageWidth,
int nSearchImageHeight,
int nTemplateWidth,
int nTemplateHeight,
int &nMatchPointX,
int &nMatchPointY
)
{
float *m_pCorrMatrix = NULL;
m_pCorrMatrix = new float[(nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)];
memset(m_pCorrMatrix, 0, (nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)*sizeof(BYTE));
// 模板匹配过程
double dMaxR = 0.1;
int i, j, m, n;
int nVectX = 0;
int nVectY = 0;
float fSigmaT, fSigmaS, fSigmaST;
for (i = 0; i < (nSearchImageHeight - nTemplateHeight); i++) {
for (j = 0; j < (nSearchImageWidth - nTemplateWidth); j++) {
fSigmaT = 0;
fSigmaS = 0;
fSigmaST = 0;
for (m = 0; m < nTemplateHeight; m++) {
for (n = 0; n < nTemplateWidth;n++) {
fSigmaT = fSigmaT + (*(pTemplate + m*nTemplateWidth + n))*(*(pTemplate + m*nTemplateWidth + n));
fSigmaS = fSigmaS + (*(pSearchImage + (i+m)*nSearchImageWidth + (j+n)))*(*(pSearchImage + (m+i)*nSearchImageWidth + (j + n)));
fSigmaST = fSigmaST + (*(pSearchImage + (i + m)*nSearchImageWidth + (j + n)))*(*(pTemplate + m*nTemplateWidth + n));
}
}
float tmp = fSigmaST / sqrt(fSigmaS*fSigmaS + fSigmaT*fSigmaT);
*(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) = tmp;
if (*(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) > dMaxR) {
dMaxR = *(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j);
nVectX = i;
nVectY = j;
}
}
}
nMatchPointX = nVectX;
nMatchPointY = nVectY;
if (m_pCorrMatrix != NULL)
delete m_pCorrMatrix;
return 0;
}
GPU实现模板匹配
extern "C"
static int iDivUp(int a, int b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
}
__global__ void CudaMatrixMul_kernal(BYTE *pSrc,
float *pSelfMul,
int nHeight,
int nWidth)
{
const int row = blockDim.y*blockIdx.y + threadIdx.y;
const int col = blockDim.x*blockIdx.x + threadIdx.x;
const int point = row*nWidth + col;
float tmp;
if (row < nHeight&&col < nWidth) {
tmp = pSrc[point];
tmp *= tmp;
pSelfMul[point] = tmp;
}
}
void CudaMatrixSelfMul(
BYTE *pSrc,
float *pSelfMul,
int nHeight,
int nWidth
) {
cudaMemset(pSelfMul, 0, nHeight*nWidth * sizeof(float));
dim3 threads(16, 16);
dim3 grid(iDivUp(nWidth, threads.x), iDivUp(nHeight, threads.y));
CudaMatrixMul_kernal << <grid, threads >> > (pSrc, pSelfMul, nHeight, nWidth);
}
__global__ static void
CorrMatrix_kernel(
BYTE *pTemplate,
BYTE *pSearchImage,
float *pMulTT,
float *pMulSS,
float *pMulST,
float *pCorrMatrix,
int nTemplateHeight,
int nTemplateWidth,
int nSearchImageHeight,
int nSearchImageWidth
) {
const int y = blockDim.y *blockIdx.y + threadIdx.y;
const int x = blockDim.x *blockIdx.x + threadIdx.x;
float fSigmaTT = 0;
float fSigmaSS = 0;
float fSigmaST = 0;
float fSigmaS = 0;
float fSigmaT = 0;
//
if (y >= 0 && y <= (nSearchImageHeight - nTemplateHeight + 1) &&
x >= 0 && x <= (nSearchImageWidth - nTemplateWidth + 1)) {
for (int m = 0; m < nTemplateHeight; m++) {
for (int n = 0; n < nTemplateWidth; n++) {
fSigmaTT = fSigmaTT + pMulTT[m*nTemplateWidth + n];
fSigmaSS = fSigmaSS + pMulSS[(y + m)*nSearchImageWidth + (x + n)];
fSigmaS = pSearchImage[((y + m)*nSearchImageWidth + (x + n))];
fSigmaT = pTemplate[m*nTemplateWidth + n];
fSigmaST = fSigmaST + fSigmaS*fSigmaT;
}
}
pCorrMatrix[y*(nSearchImageWidth - nTemplateWidth) + x] =
fSigmaST / sqrt(fSigmaSS*fSigmaSS + fSigmaTT*fSigmaTT);
}
}
void CorrMatrix(BYTE *pTemplate,
BYTE* pSearchImage,
float *pMulTT,
float* pMulSS,
float *pMulST,
float *pCorrMatrix,
int nTemplatHeigt,
int nTemplatWidth,
int nSearchImageHeight,
int nSearchImageWidth)
{
// 计算相关性矩阵
dim3 threads(16, 16);
dim3 grid(iDivUp(nSearchImageWidth - nTemplatWidth, threads.x), iDivUp(nSearchImageHeight, threads.y));
// 启动计算相关性矩阵的核函数
CorrMatrix_kernel << <grid, threads >> > (pTemplate, pSearchImage,
pMulTT, pMulSS, pMulST, pCorrMatrix, nTemplatHeigt, nTemplatWidth,
nSearchImageHeight, nSearchImageWidth);
}
void FindMaxCorrMatrixPoint(
float *pCorrMatrix,
int nWidth,
int nHeight,
double dMaxR,
int &nVectY,
int &nVectX
) {
// 寻找最新点
for (int i = 0; i < nHeight; i++) {
for (int j = 0; j < nWidth; j++) {
if (*(pCorrMatrix + i*nWidth + j) > dMaxR) {
dMaxR = *(pCorrMatrix + i*nWidth + j);
nVectY = i;
nVectX = j;
}
}
}
}
extern "C"
double cudaMatchTemplate(
BYTE *pSrc,BYTE *pTemplate,
int nWidth,int nHeight,
int nTemplateWidth,int nTemplateHeight,
int &nMatchPointX,int nMatchPointY
) {
// 图像空间初始化
BYTE *d_pSearchImage = NULL;
BYTE *d_pTemplate = NULL;
float *d_pMulTT = NULL;
float *d_pMulSS = NULL;
float *d_pMulST = NULL;
float *d_pCorrMatrix = NULL;
float *m_CorrMatrix = NULL;
cudaMalloc((void**)&d_pSearchImage, nWidth*nHeight*sizeof(BYTE));
cudaMalloc((void**)&d_pTemplate, nTemplateHeight*nTemplateWidth*sizeof(BYTE));
cudaMalloc((void**)&d_pMulTT, nTemplateHeight*nTemplateWidth * sizeof(float));
cudaMalloc((void**)&d_pMulSS, nWidth*nHeight * sizeof(float));
cudaMalloc((void**)&d_pMulST, nTemplateWidth*nTemplateHeight * sizeof(float));
cudaMalloc((void**)&d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)*sizeof(float));
cudaMemset(d_pSearchImage, 0, nWidth*nHeight * sizeof(BYTE));
cudaMemset(d_pTemplate, 0, nTemplateHeight*nTemplateWidth * sizeof(BYTE));
cudaMemset(d_pMulTT, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
cudaMemset(d_pMulSS, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
cudaMemset(d_pMulST, 0, nTemplateWidth*nTemplateHeight * sizeof(float));
cudaMemset(d_pCorrMatrix, 0, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight) * sizeof(float));
cudaMemcpy(d_pSearchImage, pSrc, nWidth*nHeight * sizeof(BYTE), cudaMemcpyHostToDevice);
cudaMemcpy(d_pTemplate, pTemplate, nTemplateWidth*nTemplateHeight * sizeof(BYTE), cudaMemcpyHostToDevice);
// 模板匹配过程
double dMaxR = 0.1;
int i, j;
int nVectX = 0;
int nVectY = 0;
// 模板元素自己点乘
CudaMatrixSelfMul(d_pTemplate, d_pMulTT,nTemplateHeight,nTemplateWidth);
// 搜素图元素自己点乘
CudaMatrixSelfMul(d_pSearchImage, d_pMulSS, nHeight, nWidth);
// 计算相关性矩阵
CorrMatrix(d_pTemplate, d_pSearchImage, d_pMulTT, d_pMulSS, d_pMulST,
d_pCorrMatrix,nTemplateHeight, nTemplateWidth, nHeight, nWidth);
float *m_pCorrMatrix = new float[(nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)];
cudaMemcpy(m_pCorrMatrix, d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight),cudaMemcpyDeviceToHost);
// 寻找最佳匹配点
FindMaxCorrMatrixPoint(m_pCorrMatrix, nWidth - nTemplateWidth, nHeight - nTemplateHeight,dMaxR, nVectY, nVectX);
nMatchPointX = nVectX;
nMatchPointY = nVectY;
return 0;
}
上一篇: 强化飞机界面