欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

CUDA图像处理 | 模板匹配

程序员文章站 2022-04-01 09:44:05
...

模板匹配

OpenCV中的模板匹配

该部分转载自 【OpenCV3】模板匹配——cv::matchTemplate()详解

匹配方法

  • cv::TM_SQDIFF:该方法使用平方差进行匹配,因此最佳的匹配结果在结果为0处,值越大匹配结果越差。
  • cv::TM_CCORR:相关性匹配方法,该方法使用源图像与模板图像的卷积结果进行匹配,因此,最佳匹配位置在值最大处,值越小匹配结果越差。
  • cv::TM_CCORR_NORMED:归一化的相关性匹配方法,与相关性匹配方法类似,最佳匹配位置也是在值最大处。
  • cv::TM_CCOEFF:相关性系数匹配方法 ,该方法使用源图像与其均值的差、模板与其均值的差二者之间的相关性进行匹配,最佳匹配结果在值等于1处,最差匹配结果在值等于-1处,值等于0直接表示二者不相关。
  • cv::TM_CCOEFF_NORMED:归一化的相关性系数匹配方法,正值表示匹配的结果较好,负值则表示匹配的效果较差,也是值越大,匹配效果也好。

代码实现

int main() {
    Mat image_source = imread("E:\\JZCHEN\\test\\lena.jpg", IMREAD_GRAYSCALE);
    Mat image_template = imread("E:\\JZCHEN\\test\\template.jpg", IMREAD_GRAYSCALE);
    Mat image_matched;
    //模板匹配
    /*
        void cv::matchTemplate(
            cv::InputArray image, // 用于搜索的输入图像, 8U 或 32F, 大小 WxH
            cv::InputArray templ, // 用于匹配的模板,和image类型相同, 大小 wxh
            cv::OutputArray result, // 匹配结果图像, 类型 32F, 大小 (W-w+1)x(H-h+1)
            int method // 用于比较的方法
        );
    */
    cv::matchTemplate(image_source, image_template, image_matched, cv::TM_CCOEFF_NORMED);

    double minVal, maxVal;
    cv::Point minLoc, maxLoc;

    //寻找最佳匹配位置
    // 这里注意匹配结果图像与原图像之间的大小关系
    cv::minMaxLoc(image_matched, &minVal, &maxVal, &minLoc, &maxLoc);

    cv::Mat image_color;
    cv::cvtColor(image_source, image_color, CV_GRAY2BGR); // 将灰度图转成BGR格式
    cv::circle(image_color,
        cv::Point(maxLoc.x + image_template.cols / 2, maxLoc.y + image_template.rows / 2),
        20,
        cv::Scalar(0, 0, 255),
        2,
        8,
        0);

    cv::imshow("source image", image_source);
    cv::imshow("match result", image_matched);
    cv::imshow("target", image_color);
    cv::waitKey(0);

    return 0;
}

C++实现模板匹配

归一化的相关性系数匹配方法 实现

double MatchTemplateCPU(BYTE *pSearchImage,
    BYTE *pTemplate,
    int nSearchImageWidth,
    int nSearchImageHeight,
    int nTemplateWidth,
    int nTemplateHeight,
    int &nMatchPointX,
    int &nMatchPointY
)
{
    float *m_pCorrMatrix = NULL;
    m_pCorrMatrix = new float[(nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)];
    memset(m_pCorrMatrix, 0, (nSearchImageHeight - nTemplateHeight)*(nSearchImageWidth - nTemplateWidth)*sizeof(BYTE));

    // 模板匹配过程
    double dMaxR = 0.1;
    int i, j, m, n;
    int nVectX = 0;
    int nVectY = 0;
    float fSigmaT, fSigmaS, fSigmaST;
    for (i = 0; i < (nSearchImageHeight - nTemplateHeight); i++) {
        for (j = 0; j < (nSearchImageWidth - nTemplateWidth); j++) {
            fSigmaT = 0;
            fSigmaS = 0;
            fSigmaST = 0;
            for (m = 0; m < nTemplateHeight; m++) {
                for (n = 0; n < nTemplateWidth;n++) {
                    fSigmaT = fSigmaT + (*(pTemplate + m*nTemplateWidth + n))*(*(pTemplate + m*nTemplateWidth + n));
                    fSigmaS = fSigmaS + (*(pSearchImage + (i+m)*nSearchImageWidth + (j+n)))*(*(pSearchImage + (m+i)*nSearchImageWidth + (j + n)));
                    fSigmaST = fSigmaST + (*(pSearchImage + (i + m)*nSearchImageWidth + (j + n)))*(*(pTemplate + m*nTemplateWidth + n));
                }
            }
            float tmp = fSigmaST / sqrt(fSigmaS*fSigmaS + fSigmaT*fSigmaT);
            *(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) = tmp;
            if (*(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j) > dMaxR) {
                dMaxR = *(m_pCorrMatrix + i*(nSearchImageWidth - nTemplateWidth) + j);
                nVectX = i;
                nVectY = j;
            }
        }
    }
    nMatchPointX = nVectX;
    nMatchPointY = nVectY;

    if (m_pCorrMatrix != NULL)
        delete m_pCorrMatrix;
    return 0;
}

GPU实现模板匹配

extern "C"
static int iDivUp(int a, int b)
{
    return (a % b != 0) ? (a / b + 1) : (a / b);
}

__global__ void CudaMatrixMul_kernal(BYTE *pSrc,
                                float *pSelfMul,
                                int nHeight,
                                int nWidth) 
{

    const int row = blockDim.y*blockIdx.y + threadIdx.y;
    const int col = blockDim.x*blockIdx.x + threadIdx.x;

    const int point = row*nWidth + col;

    float tmp;
    if (row < nHeight&&col < nWidth) {
        tmp = pSrc[point];
        tmp *= tmp;
        pSelfMul[point] = tmp;
    }
}

void CudaMatrixSelfMul(
            BYTE *pSrc,
            float *pSelfMul,
            int nHeight,
            int nWidth
) {
    cudaMemset(pSelfMul, 0, nHeight*nWidth * sizeof(float));

    dim3 threads(16, 16);
    dim3 grid(iDivUp(nWidth, threads.x), iDivUp(nHeight, threads.y));

    CudaMatrixMul_kernal << <grid, threads >> > (pSrc, pSelfMul, nHeight, nWidth);

}


__global__ static void
CorrMatrix_kernel(
                BYTE *pTemplate,
                BYTE *pSearchImage,
                float *pMulTT,
                float *pMulSS,
                float *pMulST,
                float *pCorrMatrix,
                int nTemplateHeight,
                int nTemplateWidth,
                int nSearchImageHeight,
                int nSearchImageWidth
) {
    const int y = blockDim.y *blockIdx.y + threadIdx.y;
    const int x = blockDim.x *blockIdx.x + threadIdx.x;

    float fSigmaTT = 0;
    float fSigmaSS = 0;
    float fSigmaST = 0;
    float fSigmaS = 0;
    float fSigmaT = 0;

    // 
    if (y >= 0 && y <= (nSearchImageHeight - nTemplateHeight + 1) &&
        x >= 0 && x <= (nSearchImageWidth - nTemplateWidth + 1)) {

        for (int m = 0; m < nTemplateHeight; m++) {

            for (int n = 0; n < nTemplateWidth; n++) {

                fSigmaTT = fSigmaTT + pMulTT[m*nTemplateWidth + n];
                fSigmaSS = fSigmaSS + pMulSS[(y + m)*nSearchImageWidth + (x + n)];
                fSigmaS = pSearchImage[((y + m)*nSearchImageWidth + (x + n))];
                fSigmaT = pTemplate[m*nTemplateWidth + n];
                fSigmaST = fSigmaST + fSigmaS*fSigmaT;
            }
        }

        pCorrMatrix[y*(nSearchImageWidth - nTemplateWidth) + x] =
            fSigmaST / sqrt(fSigmaSS*fSigmaSS + fSigmaTT*fSigmaTT);
    }
}


void CorrMatrix(BYTE *pTemplate, 
    BYTE* pSearchImage,
    float *pMulTT, 
    float* pMulSS, 
    float *pMulST, 
    float *pCorrMatrix,
    int nTemplatHeigt, 
    int nTemplatWidth, 
    int nSearchImageHeight, 
    int nSearchImageWidth)
{

    // 计算相关性矩阵
    dim3 threads(16, 16);
    dim3 grid(iDivUp(nSearchImageWidth - nTemplatWidth, threads.x), iDivUp(nSearchImageHeight, threads.y));

    // 启动计算相关性矩阵的核函数
    CorrMatrix_kernel << <grid, threads >> > (pTemplate, pSearchImage,
        pMulTT, pMulSS, pMulST, pCorrMatrix, nTemplatHeigt, nTemplatWidth,
        nSearchImageHeight, nSearchImageWidth);
}




void FindMaxCorrMatrixPoint(
        float *pCorrMatrix,
        int nWidth,
        int nHeight,
        double dMaxR,
        int &nVectY,
        int &nVectX
) {

    // 寻找最新点
    for (int i = 0; i < nHeight; i++) {
        for (int j = 0; j < nWidth; j++) {
            if (*(pCorrMatrix + i*nWidth + j) > dMaxR) {
                dMaxR = *(pCorrMatrix + i*nWidth + j);
                nVectY = i;
                nVectX = j;
            }
        }
    }
}


extern "C"
double cudaMatchTemplate(
    BYTE *pSrc,BYTE *pTemplate,
    int nWidth,int nHeight,
    int nTemplateWidth,int nTemplateHeight,
    int &nMatchPointX,int nMatchPointY
) {
    // 图像空间初始化
    BYTE *d_pSearchImage = NULL;
    BYTE *d_pTemplate = NULL;

    float *d_pMulTT = NULL;
    float *d_pMulSS = NULL;
    float *d_pMulST = NULL;
    float *d_pCorrMatrix = NULL;
    float *m_CorrMatrix = NULL;

    cudaMalloc((void**)&d_pSearchImage, nWidth*nHeight*sizeof(BYTE));
    cudaMalloc((void**)&d_pTemplate, nTemplateHeight*nTemplateWidth*sizeof(BYTE));

    cudaMalloc((void**)&d_pMulTT, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMalloc((void**)&d_pMulSS, nWidth*nHeight * sizeof(float));
    cudaMalloc((void**)&d_pMulST, nTemplateWidth*nTemplateHeight * sizeof(float));

    cudaMalloc((void**)&d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)*sizeof(float));

    cudaMemset(d_pSearchImage, 0, nWidth*nHeight * sizeof(BYTE));
    cudaMemset(d_pTemplate, 0, nTemplateHeight*nTemplateWidth * sizeof(BYTE));
    cudaMemset(d_pMulTT, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMemset(d_pMulSS, 0, nTemplateHeight*nTemplateWidth * sizeof(float));
    cudaMemset(d_pMulST, 0, nTemplateWidth*nTemplateHeight * sizeof(float));
    cudaMemset(d_pCorrMatrix, 0, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight) * sizeof(float));

    cudaMemcpy(d_pSearchImage, pSrc, nWidth*nHeight * sizeof(BYTE), cudaMemcpyHostToDevice);
    cudaMemcpy(d_pTemplate, pTemplate, nTemplateWidth*nTemplateHeight * sizeof(BYTE), cudaMemcpyHostToDevice);



    // 模板匹配过程
    double dMaxR = 0.1;
    int i, j;
    int nVectX = 0;
    int nVectY = 0;
    // 模板元素自己点乘
    CudaMatrixSelfMul(d_pTemplate, d_pMulTT,nTemplateHeight,nTemplateWidth);

    // 搜素图元素自己点乘
    CudaMatrixSelfMul(d_pSearchImage, d_pMulSS, nHeight, nWidth);

    // 计算相关性矩阵
    CorrMatrix(d_pTemplate, d_pSearchImage, d_pMulTT, d_pMulSS, d_pMulST,
        d_pCorrMatrix,nTemplateHeight, nTemplateWidth, nHeight, nWidth);

    float *m_pCorrMatrix = new float[(nWidth - nTemplateWidth)*(nHeight - nTemplateHeight)];
    cudaMemcpy(m_pCorrMatrix, d_pCorrMatrix, (nWidth - nTemplateWidth)*(nHeight - nTemplateHeight),cudaMemcpyDeviceToHost);

    //  寻找最佳匹配点
    FindMaxCorrMatrixPoint(m_pCorrMatrix, nWidth - nTemplateWidth, nHeight - nTemplateHeight,dMaxR, nVectY, nVectX);

    nMatchPointX = nVectX;
    nMatchPointY = nVectY;

    return 0;

}