基于高斯混合模型和卡尔曼滤波的多目标追踪方法（C++实现）

程序员文章站 2022-04-18 10:22:02

...

最近学习了一下多目标跟踪，看了看MathWorks的关于Motion-Based Multiple Object Tracking的Documention。
实现硬件条件：
VS2015+opencv3.10
opencv2.4.9需要修改几处，需要的可留下邮箱，小博立马回复
下面贴出我的车辆追踪效果图吧，当然大家可以用来适用于别的场景了。其中红点代表车的质心，绿线代表轨迹和轮廓。
基于高斯混合模型和卡尔曼滤波的多目标追踪方法（C++实现）

官网链接：http://cn.mathworks.com/help/vision/examples/motion-based-multiple-object-tracking.html?s_tid=gn_loc_drop
经过小博两天晚上的加工，终于将这篇论文用C++和opencv给实现出来了。这种方法用于静止背景下的多目标检测与跟踪。效果非常不错，特别适用于无人机上。

程序可以分为两部分，1.每一帧检测运动objects；
2.实时的将检测得到的区域匹配到相同一个物体；检测部分，用的是基于高斯混合模型的背景剪除法；

   参考链接：http://blog.pluskid.org/?p=39

所谓单高斯模型，就是用多维高斯分布概率来进行模式分类，其中μ用训练样本均值代替，Σ用样本方差代替，X为d维的样本向量。通过高斯概率公式就可以得出类别C属于正（负）样本的概率。而混合高斯模型（GMM）就是数据从多个高斯分布中产生的。每个GMM由K个高斯分布线性叠加而 P（x）=Σp(k)*p(x|k) 相当于对各个高斯分布进行加权（权系数越大，那么这个数据属于这个高斯分布的可能性越大）
而在实际过程中，我们是在已知数据的前提下，对GMM进行参数估计，具体在这里即为图片训练一个合适的GMM模型。那么在前景检测中，我们会取静止背景（约50帧图像）来进行GMM参数估计，进行背景建模。分类域值官网取得0.7，经验取值0.7-0.75可调。这一步将会分离前景和背景，输出为前景二值掩码。
然后进行形态学运算，并通过函数返回运动区域的centroids和bboxes，完成前景检测部分。

跟踪部分，用的是卡尔曼滤波。卡尔曼是一个线性估计算法，可以建立帧间bboxs的关系。

跟踪分为5种状态： 1，新目标出现 2，目标匹配 3，目标遮挡 4，目标分离 5，目标消失。

卡尔曼原理在这儿我就不贴了，网上很多。

状态方程： X(k+1)=A(K+1,K)X(K)+w(K) 其中 X（k）=[x(k),y(k),w(k),h(k),v(k)], x,y,w,h，分别表示bboxs的横纵坐标，长，宽。

观测方程： Z(k)=H(k)X(k)+v(k) w(k), v(k)，不相关的高斯白噪声。

定义好了观测方程与状态方程之后就可以用卡尔曼滤波器实现运动目标的跟踪，步骤如下：

1）计算运动目标的特征信息（运动质心，以及外接矩形）。

2)用得到的特征信息初始化卡尔曼滤波器（开始时可以初始为0）。

3）用卡尔曼滤波器对下一帧中对应的目标区域进行预测，当下一帧到来时，在预测区域内进行目标匹配。

4）如果匹配成功，则更新卡尔曼滤波器

在匹配的过程中，使用的是匈牙利匹配算法，匈牙利算法在这里有很好的介绍：http://blog.csdn.net/pi9nc/article/details/11848327

匈牙利匹配算法在此处是将新一帧图片中检测到的运动物体匹配到对应的轨迹。匹配过程是通过最小化卡尔曼预测得到的质心与检测到的质心之间的欧氏距离之和实现的

具体可以分为两步：

1，计算损失矩阵，大小为[M N]，其中，M是轨迹数目，N是检测到的运动物体数目。

2，求解损失矩阵

主要思路就是这么多，下面贴上C++的demo，大家可以跑一跑。

#include <iostream>  
#include <opencv2/opencv.hpp>  
#include <opencv2/video/background_segm.hpp>  
using namespace std;
using namespace cv;

#define drawCross( img, center, color, d )\
line(img, Point(center.x - d, center.y - d), Point(center.x + d, center.y + d), color, 2, CV_AA, 0);\
line(img, Point(center.x + d, center.y - d), Point(center.x - d, center.y + d), color, 2, CV_AA, 0);

vector<Point> mousev, kalmanv;


cv::KalmanFilter KF;
cv::Mat_<float> measurement(2, 1);
Mat_<float> state(4, 1); // (x, y, Vx, Vy)  
int incr = 0;
string num2str(int i)
{
    stringstream ss;
    ss << i;
    return ss.str();
}

void initKalman(float x, float y)
{
    // Instantate Kalman Filter with  
    // 4 dynamic parameters and 2 measurement parameters,  
    // where my measurement is: 2D location of object,  
    // and dynamic is: 2D location and 2D velocity.  
    KF.init(4, 2, 0);

    measurement = Mat_<float>::zeros(2, 1);
    measurement.at<float>(0, 0) = x;
    measurement.at<float>(0, 0) = y;


    KF.statePre.setTo(0);
    KF.statePre.at<float>(0, 0) = x;
    KF.statePre.at<float>(1, 0) = y;

    KF.statePost.setTo(0);
    KF.statePost.at<float>(0, 0) = x;
    KF.statePost.at<float>(1, 0) = y;

    setIdentity(KF.transitionMatrix);
    setIdentity(KF.measurementMatrix);
    setIdentity(KF.processNoiseCov, Scalar::all(.005)); //adjust this for faster convergence - but higher noise  
    setIdentity(KF.measurementNoiseCov, Scalar::all(1e-1));
    setIdentity(KF.errorCovPost, Scalar::all(.1));
}

Point kalmanPredict()
{
    Mat prediction = KF.predict();
    Point predictPt(prediction.at<float>(0), prediction.at<float>(1));

    KF.statePre.copyTo(KF.statePost);
    KF.errorCovPre.copyTo(KF.errorCovPost);

    return predictPt;
}

Point kalmanCorrect(float x, float y)
{
    measurement(0) = x;
    measurement(1) = y;
    Mat estimated = KF.correct(measurement);
    Point statePt(estimated.at<float>(0), estimated.at<float>(1));
    return statePt;
}

int main()
{
    Mat frame, thresh_frame;
    vector<Mat> channels;
    VideoCapture capture("1.avi");
    if (!capture.isOpened())
        cout << "hello";
    vector<Vec4i> hierarchy;
    vector<vector<Point> > contours;

    // cv::Mat frame;  
    cv::Mat back,img;
    cv::Mat fore;
    cv::Ptr<BackgroundSubtractorMOG2> bg = createBackgroundSubtractorMOG2();
    //cv::BackgroundSubtractorMOG2 bg;
    // bg->setNMixtures(3);
    // bg->apply(img, mask);
    //bg.nmixtures = 3;//nmixtures  
    //bg.bShadowDetection = false;  
    int incr = 0;
    int track = 0;
    bool update_bg_model = true;
    //capture.open("1.avi");

    if (!capture.isOpened())
        cerr << "Problem opening video source" << endl;


    mousev.clear();
    kalmanv.clear();

    initKalman(0, 0);

    while ((char)waitKey(1) != 'q' && capture.grab())
    {

        Point s, p;

        capture.retrieve(frame);
        //bg(img, fgmask, update_bg_model ? -1 : 0);
        //bg->operator ()(frame, fore);
        bg->apply(frame, fore, update_bg_model ? -1 : 0);
        bg->getBackgroundImage(back);
        erode(fore, fore, Mat());
        erode(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());
        dilate(fore, fore, Mat());

        cv::normalize(fore, fore, 0, 1., cv::NORM_MINMAX);
        cv::threshold(fore, fore, .5, 1., CV_THRESH_BINARY);


        split(frame, channels);
        add(channels[0], channels[1], channels[1]);
        subtract(channels[2], channels[1], channels[2]);
        threshold(channels[2], thresh_frame, 50, 255, CV_THRESH_BINARY);
        medianBlur(thresh_frame, thresh_frame, 5);

        //       imshow("Red", channels[1]);  
        findContours(fore, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
        vector<vector<Point> > contours_poly(contours.size());
        vector<Rect> boundRect(contours.size());

        Mat drawing = Mat::zeros(thresh_frame.size(), CV_8UC1);
        for (size_t i = 0; i < contours.size(); i++)
        {
            //          cout << contourArea(contours[i]) << endl;  
            if (contourArea(contours[i]) > 500)
                drawContours(drawing, contours, i, Scalar::all(255), CV_FILLED, 8, vector<Vec4i>(), 0, Point());
        }
        thresh_frame = drawing;

        findContours(fore, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));

        drawing = Mat::zeros(thresh_frame.size(), CV_8UC1);
        for (size_t i = 0; i < contours.size(); i++)
        {
            //          cout << contourArea(contours[i]) << endl;  
            if (contourArea(contours[i]) > 3000)
                drawContours(drawing, contours, i, Scalar::all(255), CV_FILLED, 8, vector<Vec4i>(), 0, Point());
        }
        thresh_frame = drawing;

        // Get the moments  
        vector<Moments> mu(contours.size());
        for (size_t i = 0; i < contours.size(); i++)
        {
            mu[i] = moments(contours[i], false);
        }

        //  Get the mass centers:  
        vector<Point2f> mc(contours.size());
        for (size_t i = 0; i < contours.size(); i++)

        {
            mc[i] = Point2f(mu[i].m10 / mu[i].m00, mu[i].m01 / mu[i].m00);


            /*
            for(size_t i = 0; i < mc.size(); i++)
            {

            //       drawCross(frame, mc[i], Scalar(255, 0, 0), 5);
            //measurement(0) = mc[i].x;
            //measurement(1) = mc[i].y;


            //        line(frame, p, s, Scalar(255,255,0), 1);

            //          if (measurement(1) <= 130 && measurement(1) >= 120) {
            //            incr++;
            //         cout << "Conter " << incr << " Loation " << measurement(1) << endl;
            //   }
            }*/
        }


        for (size_t i = 0; i < contours.size(); i++)
        {
            approxPolyDP(Mat(contours[i]), contours_poly[i], 3, true);
            boundRect[i] = boundingRect(Mat(contours_poly[i]));

        }

        p = kalmanPredict();
        //        cout << "kalman prediction: " << p.x << " " << p.y << endl;  
        mousev.push_back(p);
        string text;
        for (size_t i = 0; i < contours.size(); i++)
        {
            if (contourArea(contours[i]) > 1000)
            {
                rectangle(frame, boundRect[i].tl(), boundRect[i].br(), Scalar(0, 255, 0), 2, 8, 0);
                Point pt = Point(boundRect[i].x, boundRect[i].y+boundRect[i].height+15);
                Point pt1 = Point(boundRect[i].x, boundRect[i].y - 10);
                Point center = Point(boundRect[i].x + (boundRect[i].width / 2), boundRect[i].y + (boundRect[i].height / 2));
                cv::circle(frame, center, 8, Scalar(0, 0, 255), -1, 1, 0);
                Scalar color = CV_RGB(255, 0, 0);
                text = num2str(boundRect[i].width) + "*" + num2str(boundRect[i].height);
                putText(frame,"car", pt, CV_FONT_HERSHEY_DUPLEX, 1.0f, color);
                putText(frame,text, pt1, CV_FONT_HERSHEY_DUPLEX, 1.0f, color);

                s = kalmanCorrect(center.x, center.y);
                drawCross(frame, s, Scalar(255, 255, 255), 5);

                if (s.y <= 130 && p.y > 130 && s.x > 15)
                {
                    incr++;
                    cout << "Counter " << incr << endl;
                }


                for (int i = mousev.size() - 20; i < mousev.size() - 1; i++)
                {
                    line(frame, mousev[i], mousev[i + 1], Scalar(0, 255, 0), 1);
                }

            }
        }


        imshow("Video", frame);
    //  imshow("Red", channels[2]);
    //  imshow("Binary", thresh_frame);
    }
    return 0;
}

上一篇： Python数据特征分析－对比分析

下一篇： Spring Data Mongondb在使用Aggregation时碰到的问题