I am using OpenCV to prepare images for OCR from an iPhone camera, and I have been having trouble getting the results I need for an accurate OCR scan. Here is the code I am
JAVA CODE: A long time has passed since this question was made, but I've rewritten this code from C++ to Java in case someone will need it (I needed to use it for developing an app on android studio).
public Bitmap Thresholding(Bitmap bitmap)
{
Mat imgMat = new Mat();
Utils.bitmapToMat(bitmap, imgMat);
imgMat.convertTo(imgMat, CvType.CV_32FC1, 1.0 / 255.0);
Mat res = CalcBlockMeanVariance(imgMat, 21);
Core.subtract(new MatOfDouble(1.0), res, res);
Imgproc.cvtColor( imgMat, imgMat, Imgproc.COLOR_BGRA2BGR);
Core.add(imgMat, res, res);
Imgproc.threshold(res, res, 0.85, 1, Imgproc.THRESH_BINARY);
res.convertTo(res, CvType.CV_8UC1, 255.0);
Utils.matToBitmap(res, bitmap);
return bitmap;
}
public Mat CalcBlockMeanVariance (Mat Img, int blockSide)
{
Mat I = new Mat();
Mat ResMat;
Mat inpaintmask = new Mat();
Mat patch;
Mat smallImg = new Mat();
MatOfDouble mean = new MatOfDouble();
MatOfDouble stddev = new MatOfDouble();
Img.convertTo(I, CvType.CV_32FC1);
ResMat = Mat.zeros(Img.rows() / blockSide, Img.cols() / blockSide, CvType.CV_32FC1);
for (int i = 0; i < Img.rows() - blockSide; i += blockSide)
{
for (int j = 0; j < Img.cols() - blockSide; j += blockSide)
{
patch = new Mat(I,new Rect(j,i, blockSide, blockSide));
Core.meanStdDev(patch, mean, stddev);
if (stddev.get(0,0)[0] > 0.01)
ResMat.put(i / blockSide, j / blockSide, mean.get(0,0)[0]);
else
ResMat.put(i / blockSide, j / blockSide, 0);
}
}
Imgproc.resize(I, smallImg, ResMat.size());
Imgproc.threshold(ResMat, inpaintmask, 0.02, 1.0, Imgproc.THRESH_BINARY);
Mat inpainted = new Mat();
Imgproc.cvtColor(smallImg, smallImg, Imgproc.COLOR_RGBA2BGR);
smallImg.convertTo(smallImg, CvType.CV_8UC1, 255.0);
inpaintmask.convertTo(inpaintmask, CvType.CV_8UC1);
Photo.inpaint(smallImg, inpaintmask, inpainted, 5, Photo.INPAINT_TELEA);
Imgproc.resize(inpainted, ResMat, Img.size());
ResMat.convertTo(ResMat, CvType.CV_32FC1, 1.0 / 255.0);
return ResMat;
}
As the light is almost in uniform, and the foreground is easily distinguished with the background. So I think just directly threshold (using OTSU) is ok for OCR. (Almost the same with @Andrey's answer in text regions).
OpenCV 3 Code in Python:
#!/usr/bin/python3
# 2018.01.17 16:41:20 CST
import cv2
import numpy as np
img = cv2.imread("ocr.jpg")
gray = cv2.cvtColor(median, cv2.COLOR_BGR2GRAY)
th, threshed = cv2.threshold(gray,127,255, cv2.THRESH_BINARY|cv2.THRESH_OTSU)
print(th)
cv2.imwrite("res.png", threshed)
Here is my result:
Here is the code:
#include <iostream>
#include <vector>
#include <stdio.h>
#include <stdarg.h>
#include "opencv2/opencv.hpp"
#include "fstream"
#include "iostream"
using namespace std;
using namespace cv;
//-----------------------------------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------------------------------
void CalcBlockMeanVariance(Mat& Img,Mat& Res,float blockSide=21) // blockSide - the parameter (set greater for larger font on image)
{
Mat I;
Img.convertTo(I,CV_32FC1);
Res=Mat::zeros(Img.rows/blockSide,Img.cols/blockSide,CV_32FC1);
Mat inpaintmask;
Mat patch;
Mat smallImg;
Scalar m,s;
for(int i=0;i<Img.rows-blockSide;i+=blockSide)
{
for (int j=0;j<Img.cols-blockSide;j+=blockSide)
{
patch=I(Range(i,i+blockSide+1),Range(j,j+blockSide+1));
cv::meanStdDev(patch,m,s);
if(s[0]>0.01) // Thresholding parameter (set smaller for lower contrast image)
{
Res.at<float>(i/blockSide,j/blockSide)=m[0];
}else
{
Res.at<float>(i/blockSide,j/blockSide)=0;
}
}
}
cv::resize(I,smallImg,Res.size());
cv::threshold(Res,inpaintmask,0.02,1.0,cv::THRESH_BINARY);
Mat inpainted;
smallImg.convertTo(smallImg,CV_8UC1,255);
inpaintmask.convertTo(inpaintmask,CV_8UC1);
inpaint(smallImg, inpaintmask, inpainted, 5, INPAINT_TELEA);
cv::resize(inpainted,Res,Img.size());
Res.convertTo(Res,CV_32FC1,1.0/255.0);
}
//-----------------------------------------------------------------------------------------------------
//
//-----------------------------------------------------------------------------------------------------
int main( int argc, char** argv )
{
namedWindow("Img");
namedWindow("Edges");
//Mat Img=imread("D:\\ImagesForTest\\BookPage.JPG",0);
Mat Img=imread("Test2.JPG",0);
Mat res;
Img.convertTo(Img,CV_32FC1,1.0/255.0);
CalcBlockMeanVariance(Img,res);
res=1.0-res;
res=Img+res;
imshow("Img",Img);
cv::threshold(res,res,0.85,1,cv::THRESH_BINARY);
cv::resize(res,res,cv::Size(res.cols/2,res.rows/2));
imwrite("result.jpg",res*255);
imshow("Edges",res);
waitKey(0);
return 0;
}
And Python port:
import cv2 as cv
import numpy as np
#-----------------------------------------------------------------------------------------------------
#
#-----------------------------------------------------------------------------------------------------
def CalcBlockMeanVariance(Img,blockSide=21): # blockSide - the parameter (set greater for larger font on image)
I=np.float32(Img)/255.0
Res=np.zeros( shape=(int(Img.shape[0]/blockSide),int(Img.shape[1]/blockSide)),dtype=np.float)
for i in range(0,Img.shape[0]-blockSide,blockSide):
for j in range(0,Img.shape[1]-blockSide,blockSide):
patch=I[i:i+blockSide+1,j:j+blockSide+1]
m,s=cv.meanStdDev(patch)
if(s[0]>0.001): # Thresholding parameter (set smaller for lower contrast image)
Res[int(i/blockSide),int(j/blockSide)]=m[0]
else:
Res[int(i/blockSide),int(j/blockSide)]=0
smallImg=cv.resize(I,(Res.shape[1],Res.shape[0] ) )
_,inpaintmask=cv.threshold(Res,0.02,1.0,cv.THRESH_BINARY);
smallImg=np.uint8(smallImg*255)
inpaintmask=np.uint8(inpaintmask)
inpainted=cv.inpaint(smallImg, inpaintmask, 5, cv.INPAINT_TELEA)
Res=cv.resize(inpainted,(Img.shape[1],Img.shape[0] ) )
Res=np.float32(Res)/255
return Res
#-----------------------------------------------------------------------------------------------------
#
#-----------------------------------------------------------------------------------------------------
cv.namedWindow("Img")
cv.namedWindow("Edges")
Img=cv.imread("F:\\ImagesForTest\\BookPage.JPG",0)
res=CalcBlockMeanVariance(Img)
res=1.0-res
Img=np.float32(Img)/255
res=Img+res
cv.imshow("Img",Img);
_,res=cv.threshold(res,0.85,1,cv.THRESH_BINARY);
res=cv.resize(res,( int(res.shape[1]/2),int(res.shape[0]/2) ))
cv.imwrite("result.jpg",res*255);
cv.imshow("Edges",res)
cv.waitKey(0)