MOSSE算法源码简单解析

源码

如下：

// This file ispart of the OpenCV project.  // It is subject to the license terms in the LICENSEfile found in the top-level directory  // of this distribution and athttp://opencv.org/license.html.    //  //[1] David S. Bolme et al. "Visual Object Trackingusing Adaptive Correlation Filters"  //   http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf  //    //  // credits:  // Kun-Hsin Chen: for initial c++ code  // Cracki: for the idea of only converting the usedpatch to gray  //    #include "opencv2/tracking.hpp"    namespace cv {  namespace tracking {    struct DummyModel :TrackerModel  {       virtual void modelUpdateImpl(){}       virtual void modelEstimationImpl( const std::vector<Mat>& ){}  };      const double eps=0.00001;      // fornormalization  const double rate=0.2;         //learning rate  const double psrThreshold=5.7; //no detection, if PSR is smaller than this      struct MosseImpl :TrackerMOSSE  {  protected:        Point2d center;//center of the bounding box      Size size;      //size ofthe bounding box      Mat hanWin;      Mat G;          //goal      Mat H, A,B;    //state       //  Element-wisedivision of complex numbers in src1 and src2      Mat divDFTs( const Mat &src1, const Mat &src2 ) const      {          Mat c1[2],c2[2],a1,a2,s1,s2,denom,re,im;            // split into re and im per src         cv::split(src1, c1);         cv::split(src2, c2);            // (Re2*Re2 + Im2*Im2) = denom          //   denom is same forboth channels         cv::multiply(c2[0], c2[0], s1);         cv::multiply(c2[1], c2[1], s2);          cv::add(s1,s2, denom);            // (Re1*Re2 + Im1*Im1)/(Re2*Re2 + Im2*Im2) = Re         cv::multiply(c1[0], c2[0], a1);         cv::multiply(c1[1], c2[1], a2);         cv::divide(a1+a2, denom, re, 1.0 );            // (Im1*Re2 - Re1*Im2)/(Re2*Re2 + Im2*Im2) = Im         cv::multiply(c1[1], c2[0], a1);         cv::multiply(c1[0], c2[1], a2);         cv::divide(a1+a2, denom, im, -1.0);            // Merge Re and Im back into a complex matrix          Mat dst,chn[] = {re,im};         cv::merge(chn, 2, dst);          return dst;      }          void preProcess( Mat &window) const      {         window.convertTo(window, CV_32F);          log(window+ 1.0f, window);            //normalize          Scalarmean,StdDev;         meanStdDev(window, mean, StdDev);          window =(window-mean[0]) /(StdDev[0]+eps);            //Gaussain weighting          window =window.mul(hanWin);      }          double correlate( const Mat &image_sub, Point&delta_xy ) const//计算相对位移      {          MatIMAGE_SUB, RESPONSE, response;          // filter in dft space         dft(image_sub, IMAGE_SUB, DFT_COMPLEX_OUTPUT);         mulSpectrums(IMAGE_SUB, H, RESPONSE, 0, true );         idft(RESPONSE, response, DFT_SCALE|DFT_REAL_OUTPUT);          // update center position          double maxVal; Point maxLoc;         minMaxLoc(response, 0, &maxVal, 0, &maxLoc);          delta_xy.x= maxLoc.x - int(response.size().width/2);          delta_xy.y= maxLoc.y - int(response.size().height/2);          // normalize response          Scalarmean,std;         meanStdDev(response, mean, std);          return (maxVal-mean[0]) / (std[0]+eps); // PSR      }          Mat randWarp( const Mat& a ) const      {          cv::RNGrng(8031965);            // random rotation          double C=0.1;          double ang = rng.uniform(-C,C);          double c=cos(ang), s=sin(ang);          // affine warp matrix          Mat_<float> W(2,3);          W <<c + rng.uniform(-C,C), -s + rng.uniform(-C,C), 0,               s +rng.uniform(-C,C),  c +rng.uniform(-C,C), 0;           // random translation          Mat_<float> center_warp(2, 1);          center_warp << a.cols/2, a.rows/2;          W.col(2) = center_warp - (W.colRange(0, 2))*center_warp;            Mat warped;        warpAffine(a, warped, W, a.size(), BORDER_REFLECT);          return warped;      }          virtual bool initImpl( const Mat& image, const Rect2d& boundingBox )     {          model =makePtr<DummyModel>();            Mat img;          if (image.channels() == 1)              img =image;          else             cvtColor(image, img, COLOR_BGR2GRAY);            int w = getOptimalDFTSize(int(boundingBox.width));          int h = getOptimalDFTSize(int(boundingBox.height));            //Get the center position          int x1 = int(floor((2*boundingBox.x+boundingBox.width-w)/2));          int y1 = int(floor((2*boundingBox.y+boundingBox.height-h)/2));          center.x =x1 + (w)/2;          center.y =y1 + (h)/2;          size.width= w;          size.height= h;            Mat window;        getRectSubPix(img, size, center, window);         createHanningWindow(hanWin, size, CV_32F);            // goal          Matg=Mat::zeros(size,CV_32F);          g.at<float>(h/2, w/2) = 1;        GaussianBlur(g, g, Size(-1,-1),2.0);          double maxVal;         minMaxLoc(g, 0,&maxVal);          g = g /maxVal;          dft(g, G,DFT_COMPLEX_OUTPUT);            // initial A,B and H          A =Mat::zeros(G.size(), G.type());          B =Mat::zeros(G.size(), G.type());          for(int i=0; i<8; i++)          {              Matwindow_warp = randWarp(window);              preProcess(window_warp);                MatWINDOW_WARP, A_i, B_i;             dft(window_warp, WINDOW_WARP, DFT_COMPLEX_OUTPUT);             mulSpectrums(G          ,WINDOW_WARP, A_i, 0, true);             mulSpectrums(WINDOW_WARP, WINDOW_WARP, B_i, 0, true);              A+=A_i;             B+=B_i;         }          H =divDFTs(A,B);          return true;      }        virtual bool updateImpl( const Mat& image,Rect2d& boundingBox )      {          if (H.empty()) // not initialized              return false;            Matimage_sub;         getRectSubPix(image, size, center, image_sub);            if (image_sub.channels() != 1)             cvtColor(image_sub, image_sub, COLOR_BGR2GRAY);         preProcess(image_sub);            Pointdelta_xy;          double PSR =correlate(image_sub, delta_xy);          if (PSR < psrThreshold)              return false;            //update location          center.x +=delta_xy.x;          center.y +=delta_xy.y;            Matimg_sub_new;         getRectSubPix(image, size, center, img_sub_new);          if (img_sub_new.channels() !=1)             cvtColor(img_sub_new, img_sub_new, COLOR_BGR2GRAY);         preProcess(img_sub_new);            // new state for A and B          Mat F, A_new,B_new;         dft(img_sub_new, F, DFT_COMPLEX_OUTPUT);         mulSpectrums(G, F, A_new, 0, true );         mulSpectrums(F, F, B_new, 0, true );            // update A ,B, and H          A = A*(1-rate) + A_new*rate;          B = B*(1-rate) + B_new*rate;          H =divDFTs(A, B);            // return tracked rect          double x=center.x, y=center.y;          int w = size.width,h=size.height;          boundingBox= Rect2d(Point2d(x-0.5*w,y-0.5*h), Point2d(x+0.5*w, y+0.5*h));          return true;      }    public:      MosseImpl() {isInit = 0; }        // dummy implementation.      virtual void read( const FileNode& ){}      virtual void write( FileStorage& ) const{}    }; // MosseImpl    } // tracking      Ptr<TrackerMOSSE> TrackerMOSSE::create()  {      returnmakePtr<tracking::MosseImpl>();  }      } // cv

各子函数作用

1：divDFTs

函数完成两个复数Mat相除的计算，即：

$$(src1/src2).re=(src1.re^2+src2.re^2)/(src2.re^2+src2.im^2);\\(src1/src2).im=(-src1.re*src2.im+src2.re*src1.im)/(src2.re^2+src2.im^2);\qquad\text{(undefined)}$$

2：preProcess

是对图像预处理，MOSSE原文讲到： One issue with the FFTconvolution algorithm is that the image and the filter are mapped to thetopological structure of a torus. In other words, it connects the left edge ofthe image to the right edge, and the top to the bottom. During convolution, theimages rotate through the toroidal space instead of translating as they wouldin the spatial domain. Artificially connecting the boundaries of the imageintroducesan artifact which effects the correlation output.

This effect is reduced by following thepreprocessing steps outlined in [3]. First, the pixel values are transformedusing a log function which helps with low contrast lighting situations. Thepixel values are normalized to have a mean value of 0:0 and a norm of 1:0.Finally, the image is multiplied by a cosine window which gradually reduces thepixel values near the edge to zero. This also has the benefit that it puts moreemphasis near the center of the target.

原始像素值通过一个对数函数转换，这将有助于低对比度照明的情况。这个原始的像素值就被规范化为0.0和1.0。最终，图像乘上一个余弦窗口将是靠近图像边缘的像素值接近于零。这同样有利于突出靠近中心的目标。