I want to to convert a BGR cv::Mat to gray using this formula Gray=B OR G OR R; pixel-wise operation
. I tried this:
cv::Mat diff_channels[3];
cv
just wanted to share my results
Size: akarsakov Humam Helfawi Miki OpenCV (not same operation)
[10 x 10] 0.00733416 1.03216 1.15244 0.044005
[100 x 100] 0.078231 0.0816536 0.185799 0.043516
[1000 x 1000] 7.81039 5.89764 40.7481 3.49253
[1280 x 720] 7.61432 5.31824 8.74916 1.70397
[1920 x 1080] 16.0256 12.8186 9.32367 3.6045
[4096 x 3112] 97.7365 72.6287 49.3452 22.9545
[10000 x 10000] 763.509 575.718 402.729 197.01
EDIT: i have a new laptop and tested the code again on it and on the older one. it seems the difference of results depend on different built configurations of OpenCV
1.Intel(R)_Core(TM)_i5-3230M_CPU_@_2.60GHz
Size: akarsakov Humam Helfawi Miki OpenCV (not same operation)
[10 x 10] 0.00276318 0.0627636 0.445661 0.0351318
[100 x 100] 0.0303949 0.0734216 0.0457898 0.0663162
[1000 x 1000] 3.01186 5.30727 2.11699 3.05805
[1280 x 720] 2.59975 4.91806 1.82014 2.69528
[1920 x 1080] 5.97478 11.5406 3.56213 5.52556
[4096 x 3112] 37.3076 64.1728 22.4575 35.0398
[10000 x 10000] 284.249 510.332 175.626 268.652
2.Intel(R)_Core(TM)2_Duo_CPU_____T6500__@_2.10GHz
Size: akarsakov Humam Helfawi Miki OpenCV (not same operation)
[10 x 10] 0.00586751 0.107571 24.1966 1.50844
[100 x 100] 0.0704101 0.154511 0.308044 0.119306
[1000 x 1000] 7.00825 11.072 3.44912 5.25778
[1280 x 720] 6.63322 9.88529 3.91999 5.0177
[1920 x 1080] 14.6199 21.8047 7.19357 10.9551
[4096 x 3112] 85.8226 133.165 42.4392 64.2184
[10000 x 10000] 675.604 1050.19 334.334 507.87
You can use cv::ParallelLoopBody
with cv::parallel_for_
to use OpenCV Concurrency API:
class ParallelBGRtoGrayOR : public ParallelLoopBody
{
const Mat3b src;
mutable Mat1b dst;
public:
ParallelBGRtoGrayOR(const Mat3b& _src, Mat1b& _dst) : ParallelLoopBody(), src(_src), dst(_dst) {}
virtual void operator()(const Range& range) const
{
int rows = range.end - range.start;
int cols = src.cols;
int len = rows * cols;
const uchar* yS = src.ptr<uchar>(range.start);
uchar* yD = dst.ptr<uchar>(range.start);
for (int i = 0; i < len; ++i, yD++, yS += 3)
{
*yD = yS[0] | yS[1] | yS[2];
//*yD = std::max(yS[0], std::max(yS[1], yS[2]));
}
}
};
void cvtBgrToGray_OR_Miki(const Mat3b& src, Mat1b& dst)
{
dst.create(src.rows, src.cols);
parallel_for_(Range(0, src.rows), ParallelBGRtoGrayOR(src, dst), -1);
}
Test
Testing with your and @akarsakov method, I got (time in ms):
Size: akarsakov Humam Helfawi Miki OpenCV (not same operation)
[10 x 10] 0.00109963 0.0711094 2.60722 0.0934685
[100 x 100] 0.0106298 0.0373874 0.0461844 0.0395867
[1000 x 1000] 1.1799 3.30622 0.747382 1.61646
[1280 x 720] 1.07324 2.91585 0.520858 0.9893
[1920 x 1080] 2.31252 6.87818 1.11502 1.94011
[4096 x 3112] 14.3454 42.0125 6.79644 12.0754
[10000 x 10000] 115.575 321.145 61.1544 93.8846
Considerations
@akarsakov method (working smartly on raw data) is in general the better approach, since it's very fast and easier to write. Using the ParallelLoopBody
has some advantage only with large images (at least on my pc).
I assumed source image to be continuous. This check should be done in practice.
Testing code
You can evaluate the results on your pc using this code:
#include <opencv2/opencv.hpp>
#include <iostream>
using namespace std;
using namespace cv;
class ParallelBGRtoGrayOR : public ParallelLoopBody
{
const Mat3b src;
mutable Mat1b dst;
public:
ParallelBGRtoGrayOR(const Mat3b& _src, Mat1b& _dst) : ParallelLoopBody(), src(_src), dst(_dst) {}
virtual void operator()(const Range& range) const
{
int rows = range.end - range.start;
int cols = src.cols;
int len = rows * cols;
const uchar* yS = src.ptr<uchar>(range.start);
uchar* yD = dst.ptr<uchar>(range.start);
for (int i = 0; i < len; ++i, yD++, yS += 3)
{
*yD = yS[0] | yS[1] | yS[2];
//*yD = std::max(yS[0], std::max(yS[1], yS[2]));
}
}
};
void cvtBgrToGray_OR_Miki(const Mat3b& src, Mat1b& dst)
{
dst.create(src.rows, src.cols);
parallel_for_(Range(0, src.rows), ParallelBGRtoGrayOR(src, dst), -1);
}
// credits to @akarsakov
void cvtBgrToGray_OR_akarsakov(const Mat3b& src, Mat1b& dst)
{
int rows = src.rows, cols = src.cols;
dst.create(src.size());
if (src.isContinuous() && dst.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int row = 0; row < rows; row++)
{
const uchar* src_ptr = src.ptr<uchar>(row);
uchar* dst_ptr = dst.ptr<uchar>(row);
for (int col = 0; col < cols; col++)
{
dst_ptr[col] = src_ptr[0] | src_ptr[1] | src_ptr[2];
//dst_ptr[col] = std::max(src_ptr[0], std::max(src_ptr[1], src_ptr[2]));
src_ptr += 3;
}
}
}
// credits to @Humam_Helfawi
void cvtBgrToGray_OR_Humam_Helfawi(const Mat3b& src, Mat1b& dst)
{
cv::Mat channels[3];
cv::split(src, channels);
dst = channels[0] | channels[1] | channels[2];
}
int main()
{
vector<Size> sizes{ Size(10, 10), Size(100, 100), Size(1000, 1000), Size(1280, 720), Size(1920, 1080), Size(4096, 3112), Size(10000, 10000) };
cout << "Size: \t\takarsakov \tHumam Helfawi \tMiki \tOpenCV (not same operation)" << endl;
for (int is = 0; is < sizes.size(); ++is)
{
Size sz = sizes[is];
cout << sz << "\t";
Mat3b img(sz);
randu(img, Scalar(0, 0, 0), Scalar(255, 255, 255));
Mat1b gray_akarsakov;
Mat1b gray_Miki;
Mat1b gray_Humam;
Mat1b grayOpenCV;
double tic = double(getTickCount());
cvtBgrToGray_OR_akarsakov(img, gray_akarsakov);
double toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
tic = double(getTickCount());
cvtBgrToGray_OR_Humam_Helfawi(img, gray_Humam);
toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
tic = double(getTickCount());
cvtBgrToGray_OR_Miki(img, gray_Miki);
toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << " \t";
tic = double(getTickCount());
cvtColor(img, grayOpenCV, COLOR_BGR2GRAY);
toc = (double(getTickCount()) - tic) * 1000. / getTickFrequency();
cout << toc << endl;
}
getchar();
return 0;
}
OpenCV doesn't contain any suitable built-in function to process separate channels in such way. If you want to get maximum performance you may implement this procedure by yourself. I suggest you something like this:
void calcOrChannels(const cv::Mat& src, cv::Mat& dst)
{
CV_Assert(src.type() == CV_8UC3);
int rows = src.rows, cols = src.cols;
dst.create(src.size(), CV_8UC1);
if (src.isContinuous() && dst.isContinuous())
{
cols = rows * cols;
rows = 1;
}
for (int row = 0; row < rows; row++)
{
const uchar* src_ptr = src.ptr<uchar>(row);
uchar* dst_ptr = dst.ptr<uchar>(row);
for (int col = 0; col < cols; col++)
{
dst_ptr[col] = src_ptr[0] | src_ptr[1] | src_ptr[2]; // std::max(src_ptr[0], std::max(src_ptr[1], src_ptr[2]))
src_ptr += 3;
}
}
}
Please note that you need to test performance of this function on your hardware, since it loses benefits by using SIMD instruction and parallelism which implemented (or maybe implemeted later) in OpenCV. But this procedure use less additional memory and arithmetical operations. I guess that it would work faster on the most systems (especially embedded). It also depends on sizes of your matrices.
Timings on my system (Core i7-4790):
| Matrix size | OpenCV (ms) | My (ms) |
|:-----------:|------------:|---------|
| 1280*720 | 4 | 1 |
| 1920*1080 | 8 | 2 |
| 4096*3112 | 41 | 17 |