MOSSE算法源码简单解析
源码
如下:
// This file ispart of the OpenCV project. // It is subject to the license terms in the LICENSEfile found in the top-level directory // of this distribution and athttp://opencv.org/license.html. // //[1] David S. Bolme et al. "Visual Object Trackingusing Adaptive Correlation Filters" // http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf // // // credits: // Kun-Hsin Chen: for initial c++ code // Cracki: for the idea of only converting the usedpatch to gray // #include "opencv2/tracking.hpp" namespace cv { namespace tracking { struct DummyModel :TrackerModel { virtual void modelUpdateImpl(){} virtual void modelEstimationImpl( const std::vector<Mat>& ){} }; const double eps=0.00001; // fornormalization const double rate=0.2; //learning rate const double psrThreshold=5.7; //no detection, if PSR is smaller than this struct MosseImpl :TrackerMOSSE { protected: Point2d center;//center of the bounding box Size size; //size ofthe bounding box Mat hanWin; Mat G; //goal Mat H, A,B; //state // Element-wisedivision of complex numbers in src1 and src2 Mat divDFTs( const Mat &src1, const Mat &src2 ) const { Mat c1[2],c2[2],a1,a2,s1,s2,denom,re,im; // split into re and im per src cv::split(src1, c1); cv::split(src2, c2); // (Re2*Re2 + Im2*Im2) = denom // denom is same forboth channels cv::multiply(c2[0], c2[0], s1); cv::multiply(c2[1], c2[1], s2); cv::add(s1,s2, denom); // (Re1*Re2 + Im1*Im1)/(Re2*Re2 + Im2*Im2) = Re cv::multiply(c1[0], c2[0], a1); cv::multiply(c1[1], c2[1], a2); cv::divide(a1+a2, denom, re, 1.0 ); // (Im1*Re2 - Re1*Im2)/(Re2*Re2 + Im2*Im2) = Im cv::multiply(c1[1], c2[0], a1); cv::multiply(c1[0], c2[1], a2); cv::divide(a1+a2, denom, im, -1.0); // Merge Re and Im back into a complex matrix Mat dst,chn[] = {re,im}; cv::merge(chn, 2, dst); return dst; } void preProcess( Mat &window) const { window.convertTo(window, CV_32F); log(window+ 1.0f, window); //normalize Scalarmean,StdDev; meanStdDev(window, mean, StdDev); window =(window-mean[0]) /(StdDev[0]+eps); //Gaussain weighting window =window.mul(hanWin); } double correlate( const Mat &image_sub, Point&delta_xy ) const//计算相对位移 { MatIMAGE_SUB, RESPONSE, response; // filter in dft space dft(image_sub, IMAGE_SUB, DFT_COMPLEX_OUTPUT); mulSpectrums(IMAGE_SUB, H, RESPONSE, 0, true ); idft(RESPONSE, response, DFT_SCALE|DFT_REAL_OUTPUT); // update center position double maxVal; Point maxLoc; minMaxLoc(response, 0, &maxVal, 0, &maxLoc); delta_xy.x= maxLoc.x - int(response.size().width/2); delta_xy.y= maxLoc.y - int(response.size().height/2); // normalize response Scalarmean,std; meanStdDev(response, mean, std); return (maxVal-mean[0]) / (std[0]+eps); // PSR } Mat randWarp( const Mat& a ) const { cv::RNGrng(8031965); // random rotation double C=0.1; double ang = rng.uniform(-C,C); double c=cos(ang), s=sin(ang); // affine warp matrix Mat_<float> W(2,3); W <<c + rng.uniform(-C,C), -s + rng.uniform(-C,C), 0, s +rng.uniform(-C,C), c +rng.uniform(-C,C), 0; // random translation Mat_<float> center_warp(2, 1); center_warp << a.cols/2, a.rows/2; W.col(2) = center_warp - (W.colRange(0, 2))*center_warp; Mat warped; warpAffine(a, warped, W, a.size(), BORDER_REFLECT); return warped; } virtual bool initImpl( const Mat& image, const Rect2d& boundingBox ) { model =makePtr<DummyModel>(); Mat img; if (image.channels() == 1) img =image; else cvtColor(image, img, COLOR_BGR2GRAY); int w = getOptimalDFTSize(int(boundingBox.width)); int h = getOptimalDFTSize(int(boundingBox.height)); //Get the center position int x1 = int(floor((2*boundingBox.x+boundingBox.width-w)/2)); int y1 = int(floor((2*boundingBox.y+boundingBox.height-h)/2)); center.x =x1 + (w)/2; center.y =y1 + (h)/2; size.width= w; size.height= h; Mat window; getRectSubPix(img, size, center, window); createHanningWindow(hanWin, size, CV_32F); // goal Matg=Mat::zeros(size,CV_32F); g.at<float>(h/2, w/2) = 1; GaussianBlur(g, g, Size(-1,-1),2.0); double maxVal; minMaxLoc(g, 0,&maxVal); g = g /maxVal; dft(g, G,DFT_COMPLEX_OUTPUT); // initial A,B and H A =Mat::zeros(G.size(), G.type()); B =Mat::zeros(G.size(), G.type()); for(int i=0; i<8; i++) { Matwindow_warp = randWarp(window); preProcess(window_warp); MatWINDOW_WARP, A_i, B_i; dft(window_warp, WINDOW_WARP, DFT_COMPLEX_OUTPUT); mulSpectrums(G ,WINDOW_WARP, A_i, 0, true); mulSpectrums(WINDOW_WARP, WINDOW_WARP, B_i, 0, true); A+=A_i; B+=B_i; } H =divDFTs(A,B); return true; } virtual bool updateImpl( const Mat& image,Rect2d& boundingBox ) { if (H.empty()) // not initialized return false; Matimage_sub; getRectSubPix(image, size, center, image_sub); if (image_sub.channels() != 1) cvtColor(image_sub, image_sub, COLOR_BGR2GRAY); preProcess(image_sub); Pointdelta_xy; double PSR =correlate(image_sub, delta_xy); if (PSR < psrThreshold) return false; //update location center.x +=delta_xy.x; center.y +=delta_xy.y; Matimg_sub_new; getRectSubPix(image, size, center, img_sub_new); if (img_sub_new.channels() !=1) cvtColor(img_sub_new, img_sub_new, COLOR_BGR2GRAY); preProcess(img_sub_new); // new state for A and B Mat F, A_new,B_new; dft(img_sub_new, F, DFT_COMPLEX_OUTPUT); mulSpectrums(G, F, A_new, 0, true ); mulSpectrums(F, F, B_new, 0, true ); // update A ,B, and H A = A*(1-rate) + A_new*rate; B = B*(1-rate) + B_new*rate; H =divDFTs(A, B); // return tracked rect double x=center.x, y=center.y; int w = size.width,h=size.height; boundingBox= Rect2d(Point2d(x-0.5*w,y-0.5*h), Point2d(x+0.5*w, y+0.5*h)); return true; } public: MosseImpl() {isInit = 0; } // dummy implementation. virtual void read( const FileNode& ){} virtual void write( FileStorage& ) const{} }; // MosseImpl } // tracking Ptr<TrackerMOSSE> TrackerMOSSE::create() { returnmakePtr<tracking::MosseImpl>(); } } // cv
各子函数作用
1:divDFTs
函数完成两个复数Mat相除的计算,即:
$$(src1/src2).re=(src1.re^2+src2.re^2)/(src2.re^2+src2.im^2);\\(src1/src2).im=(-src1.re*src2.im+src2.re*src1.im)/(src2.re^2+src2.im^2);\qquad\text{(undefined)}$$
2:preProcess
是对图像预处理,MOSSE原文讲到: One issue with the FFTconvolution algorithm is that the image and the filter are mapped to thetopological structure of a torus. In other words, it connects the left edge ofthe image to the right edge, and the top to the bottom. During convolution, theimages rotate through the toroidal space instead of translating as they wouldin the spatial domain. Artificially connecting the boundaries of the imageintroducesan artifact which effects the correlation output.
This effect is reduced by following thepreprocessing steps outlined in [3]. First, the pixel values are transformedusing a log function which helps with low contrast lighting situations. Thepixel values are normalized to have a mean value of 0:0 and a norm of 1:0.Finally, the image is multiplied by a cosine window which gradually reduces thepixel values near the edge to zero. This also has the benefit that it puts moreemphasis near the center of the target.
原始像素值通过一个对数函数转换,这将有助于低对比度照明的情况。这个原始的像素值就被规范化为0.0和1.0。最终,图像乘上一个余弦窗口将是靠近图像边缘的像素值接近于零。这同样有利于突出靠近中心的目标。
3:correlate
此函数先对当前帧用上一帧得到的滤波器模版H去卷积得到响应Response,对Response进行反傅里叶变换后找到峰值,更新峰值点(目标所在点),返回峰值旁瓣比,若峰值旁瓣比小代表未能检测出目标。
4:randWarp
此函数对图像进行随机仿射变换。
5:initImpl
此函数是初始化A,B,H的值,先是将图像转化为灰度图,然后以目标框(boundingbox)的中心作傅里叶变换,目标框的初始傅里叶变换值G是将中间值设为1然后作高斯滤波得到,之后就多次带入如下公式对A,B初始化,初始化过程中学习速率η为1
6:updateImpl
先是找到峰值作为新的中心点,然后按照上个公式更新A,B,H。
原文
http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf