Extracting text OpenCV

后端 未结 10 1834
臣服心动
臣服心动 2020-11-22 08:10

I am trying to find the bounding boxes of text in an image and am currently using this approach:

// calculate the local variances of the grayscale image
Mat          


        
相关标签:
10条回答
  • 2020-11-22 08:11

    This is a C# version of the answer from dhanushka using OpenCVSharp

            Mat large = new Mat(INPUT_FILE);
            Mat rgb = new Mat(), small = new Mat(), grad = new Mat(), bw = new Mat(), connected = new Mat();
    
            // downsample and use it for processing
            Cv2.PyrDown(large, rgb);
            Cv2.CvtColor(rgb, small, ColorConversionCodes.BGR2GRAY);
    
            // morphological gradient
            var morphKernel = Cv2.GetStructuringElement(MorphShapes.Ellipse, new OpenCvSharp.Size(3, 3));
            Cv2.MorphologyEx(small, grad, MorphTypes.Gradient, morphKernel);
    
            // binarize
            Cv2.Threshold(grad, bw, 0, 255, ThresholdTypes.Binary | ThresholdTypes.Otsu);
    
            // connect horizontally oriented regions
            morphKernel = Cv2.GetStructuringElement(MorphShapes.Rect, new OpenCvSharp.Size(9, 1));
            Cv2.MorphologyEx(bw, connected, MorphTypes.Close, morphKernel);
    
            // find contours
            var mask = new Mat(Mat.Zeros(bw.Size(), MatType.CV_8UC1), Range.All);
            Cv2.FindContours(connected, out OpenCvSharp.Point[][] contours, out HierarchyIndex[] hierarchy, RetrievalModes.CComp, ContourApproximationModes.ApproxSimple, new OpenCvSharp.Point(0, 0));
    
            // filter contours
            var idx = 0;
            foreach (var hierarchyItem in hierarchy)
            {
                idx = hierarchyItem.Next;
                if (idx < 0)
                    break;
                OpenCvSharp.Rect rect = Cv2.BoundingRect(contours[idx]);
                var maskROI = new Mat(mask, rect);
                maskROI.SetTo(new Scalar(0, 0, 0));
    
                // fill the contour
                Cv2.DrawContours(mask, contours, idx, Scalar.White, -1);
    
                // ratio of non-zero pixels in the filled region
                double r = (double)Cv2.CountNonZero(maskROI) / (rect.Width * rect.Height);
                if (r > .45 /* assume at least 45% of the area is filled if it contains text */
                     &&
                (rect.Height > 8 && rect.Width > 8) /* constraints on region size */
                /* these two conditions alone are not very robust. better to use something 
                like the number of significant peaks in a horizontal projection as a third condition */
                )
                {
                    Cv2.Rectangle(rgb, rect, new Scalar(0, 255, 0), 2);
                }
            }
    
            rgb.SaveImage(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "rgb.jpg"));
    
    0 讨论(0)
  • 2020-11-22 08:17

    this is a VB.NET version of the answer from dhanushka using EmguCV.

    A few functions and structures in EmguCV need different consideration than the C# version with OpenCVSharp

    Imports Emgu.CV
    Imports Emgu.CV.Structure
    Imports Emgu.CV.CvEnum
    Imports Emgu.CV.Util
    
            Dim input_file As String = "C:\your_input_image.png"
            Dim large As Mat = New Mat(input_file)
            Dim rgb As New Mat
            Dim small As New Mat
            Dim grad As New Mat
            Dim bw As New Mat
            Dim connected As New Mat
            Dim morphanchor As New Point(0, 0)
    
            '//downsample and use it for processing
            CvInvoke.PyrDown(large, rgb)
            CvInvoke.CvtColor(rgb, small, ColorConversion.Bgr2Gray)
    
            '//morphological gradient
            Dim morphKernel As Mat = CvInvoke.GetStructuringElement(ElementShape.Ellipse, New Size(3, 3), morphanchor)
            CvInvoke.MorphologyEx(small, grad, MorphOp.Gradient, morphKernel, New Point(0, 0), 1, BorderType.Isolated, New MCvScalar(0))
    
            '// binarize
            CvInvoke.Threshold(grad, bw, 0, 255, ThresholdType.Binary Or ThresholdType.Otsu)
    
            '// connect horizontally oriented regions
            morphKernel = CvInvoke.GetStructuringElement(ElementShape.Rectangle, New Size(9, 1), morphanchor)
            CvInvoke.MorphologyEx(bw, connected, MorphOp.Close, morphKernel, morphanchor, 1, BorderType.Isolated, New MCvScalar(0))
    
            '// find contours
            Dim mask As Mat = Mat.Zeros(bw.Size.Height, bw.Size.Width, DepthType.Cv8U, 1)  '' MatType.CV_8UC1
            Dim contours As New VectorOfVectorOfPoint
            Dim hierarchy As New Mat
    
            CvInvoke.FindContours(connected, contours, hierarchy, RetrType.Ccomp, ChainApproxMethod.ChainApproxSimple, Nothing)
    
            '// filter contours
            Dim idx As Integer
            Dim rect As Rectangle
            Dim maskROI As Mat
            Dim r As Double
            For Each hierarchyItem In hierarchy.GetData
                rect = CvInvoke.BoundingRectangle(contours(idx))
                maskROI = New Mat(mask, rect)
                maskROI.SetTo(New MCvScalar(0, 0, 0))
    
                '// fill the contour
                CvInvoke.DrawContours(mask, contours, idx, New MCvScalar(255), -1)
    
                '// ratio of non-zero pixels in the filled region
                r = CvInvoke.CountNonZero(maskROI) / (rect.Width * rect.Height)
    
                '/* assume at least 45% of the area Is filled if it contains text */
                '/* constraints on region size */
                '/* these two conditions alone are Not very robust. better to use something 
                'Like the number of significant peaks in a horizontal projection as a third condition */
                If r > 0.45 AndAlso rect.Height > 8 AndAlso rect.Width > 8 Then
                    'draw green rectangle
                    CvInvoke.Rectangle(rgb, rect, New MCvScalar(0, 255, 0), 2)
                End If
                idx += 1
            Next
            rgb.Save(IO.Path.Combine(Application.StartupPath, "rgb.jpg"))
    
    0 讨论(0)
  • 2020-11-22 08:18

    I used a gradient based method in the program below. Added the resulting images. Please note that I'm using a scaled down version of the image for processing.

    c++ version

    The MIT License (MIT)
    
    Copyright (c) 2014 Dhanushka Dangampola
    
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:
    
    The above copyright notice and this permission notice shall be included in
    all copies or substantial portions of the Software.
    
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    THE SOFTWARE.
    
    #include "stdafx.h"
    
    #include <opencv2/core/core.hpp>
    #include <opencv2/highgui/highgui.hpp>
    #include <opencv2/imgproc/imgproc.hpp>
    #include <iostream>
    
    using namespace cv;
    using namespace std;
    
    #define INPUT_FILE              "1.jpg"
    #define OUTPUT_FOLDER_PATH      string("")
    
    int _tmain(int argc, _TCHAR* argv[])
    {
        Mat large = imread(INPUT_FILE);
        Mat rgb;
        // downsample and use it for processing
        pyrDown(large, rgb);
        Mat small;
        cvtColor(rgb, small, CV_BGR2GRAY);
        // morphological gradient
        Mat grad;
        Mat morphKernel = getStructuringElement(MORPH_ELLIPSE, Size(3, 3));
        morphologyEx(small, grad, MORPH_GRADIENT, morphKernel);
        // binarize
        Mat bw;
        threshold(grad, bw, 0.0, 255.0, THRESH_BINARY | THRESH_OTSU);
        // connect horizontally oriented regions
        Mat connected;
        morphKernel = getStructuringElement(MORPH_RECT, Size(9, 1));
        morphologyEx(bw, connected, MORPH_CLOSE, morphKernel);
        // find contours
        Mat mask = Mat::zeros(bw.size(), CV_8UC1);
        vector<vector<Point>> contours;
        vector<Vec4i> hierarchy;
        findContours(connected, contours, hierarchy, CV_RETR_CCOMP, CV_CHAIN_APPROX_SIMPLE, Point(0, 0));
        // filter contours
        for(int idx = 0; idx >= 0; idx = hierarchy[idx][0])
        {
            Rect rect = boundingRect(contours[idx]);
            Mat maskROI(mask, rect);
            maskROI = Scalar(0, 0, 0);
            // fill the contour
            drawContours(mask, contours, idx, Scalar(255, 255, 255), CV_FILLED);
            // ratio of non-zero pixels in the filled region
            double r = (double)countNonZero(maskROI)/(rect.width*rect.height);
    
            if (r > .45 /* assume at least 45% of the area is filled if it contains text */
                && 
                (rect.height > 8 && rect.width > 8) /* constraints on region size */
                /* these two conditions alone are not very robust. better to use something 
                like the number of significant peaks in a horizontal projection as a third condition */
                )
            {
                rectangle(rgb, rect, Scalar(0, 255, 0), 2);
            }
        }
        imwrite(OUTPUT_FOLDER_PATH + string("rgb.jpg"), rgb);
    
        return 0;
    }
    

    python version

    The MIT License (MIT)
    
    Copyright (c) 2017 Dhanushka Dangampola
    
    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to deal
    in the Software without restriction, including without limitation the rights
    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:
    
    The above copyright notice and this permission notice shall be included in
    all copies or substantial portions of the Software.
    
    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    THE SOFTWARE.
    
    import cv2
    import numpy as np
    
    large = cv2.imread('1.jpg')
    rgb = cv2.pyrDown(large)
    small = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    grad = cv2.morphologyEx(small, cv2.MORPH_GRADIENT, kernel)
    
    _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
    connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
    # using RETR_EXTERNAL instead of RETR_CCOMP
    contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    #For opencv 3+ comment the previous line and uncomment the following line
    #_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    
    mask = np.zeros(bw.shape, dtype=np.uint8)
    
    for idx in range(len(contours)):
        x, y, w, h = cv2.boundingRect(contours[idx])
        mask[y:y+h, x:x+w] = 0
        cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
        r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)
    
        if r > 0.45 and w > 8 and h > 8:
            cv2.rectangle(rgb, (x, y), (x+w-1, y+h-1), (0, 255, 0), 2)
    
    cv2.imshow('rects', rgb)
    

    enter image description here enter image description here enter image description here

    0 讨论(0)
  • 2020-11-22 08:20

    You can detect text by finding close edge elements (inspired from a LPD):

    #include "opencv2/opencv.hpp"
    
    std::vector<cv::Rect> detectLetters(cv::Mat img)
    {
        std::vector<cv::Rect> boundRect;
        cv::Mat img_gray, img_sobel, img_threshold, element;
        cvtColor(img, img_gray, CV_BGR2GRAY);
        cv::Sobel(img_gray, img_sobel, CV_8U, 1, 0, 3, 1, 0, cv::BORDER_DEFAULT);
        cv::threshold(img_sobel, img_threshold, 0, 255, CV_THRESH_OTSU+CV_THRESH_BINARY);
        element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) );
        cv::morphologyEx(img_threshold, img_threshold, CV_MOP_CLOSE, element); //Does the trick
        std::vector< std::vector< cv::Point> > contours;
        cv::findContours(img_threshold, contours, 0, 1); 
        std::vector<std::vector<cv::Point> > contours_poly( contours.size() );
        for( int i = 0; i < contours.size(); i++ )
            if (contours[i].size()>100)
            { 
                cv::approxPolyDP( cv::Mat(contours[i]), contours_poly[i], 3, true );
                cv::Rect appRect( boundingRect( cv::Mat(contours_poly[i]) ));
                if (appRect.width>appRect.height) 
                    boundRect.push_back(appRect);
            }
        return boundRect;
    }
    

    Usage:

    int main(int argc,char** argv)
    {
        //Read
        cv::Mat img1=cv::imread("side_1.jpg");
        cv::Mat img2=cv::imread("side_2.jpg");
        //Detect
        std::vector<cv::Rect> letterBBoxes1=detectLetters(img1);
        std::vector<cv::Rect> letterBBoxes2=detectLetters(img2);
        //Display
        for(int i=0; i< letterBBoxes1.size(); i++)
            cv::rectangle(img1,letterBBoxes1[i],cv::Scalar(0,255,0),3,8,0);
        cv::imwrite( "imgOut1.jpg", img1);  
        for(int i=0; i< letterBBoxes2.size(); i++)
            cv::rectangle(img2,letterBBoxes2[i],cv::Scalar(0,255,0),3,8,0);
        cv::imwrite( "imgOut2.jpg", img2);  
        return 0;
    }
    

    Results:

    a. element = getStructuringElement(cv::MORPH_RECT, cv::Size(17, 3) ); imgOut1 imgOut2

    b. element = getStructuringElement(cv::MORPH_RECT, cv::Size(30, 30) ); imgOut1 imgOut2

    Results are similar for the other image mentioned.

    0 讨论(0)
  • 2020-11-22 08:26

    Python Implementation for @dhanushka's solution:

    def process_rgb(rgb):
        hasText = False
        gray = cv2.cvtColor(rgb, cv2.COLOR_BGR2GRAY)
        morphKernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3,3))
        grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, morphKernel)
        # binarize
        _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        # connect horizontally oriented regions
        morphKernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
        connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, morphKernel)
        # find contours
        mask = np.zeros(bw.shape[:2], dtype="uint8")
        _,contours, hierarchy = cv2.findContours(connected, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_SIMPLE)
        # filter contours
        idx = 0
        while idx >= 0:
            x,y,w,h = cv2.boundingRect(contours[idx])
            # fill the contour
            cv2.drawContours(mask, contours, idx, (255, 255, 255), cv2.FILLED)
            # ratio of non-zero pixels in the filled region
            r = cv2.contourArea(contours[idx])/(w*h)
            if(r > 0.45 and h > 5 and w > 5 and w > h):
                cv2.rectangle(rgb, (x,y), (x+w,y+h), (0, 255, 0), 2)
                hasText = True
            idx = hierarchy[0][idx][0]
        return hasText, rgb
    
    0 讨论(0)
  • 2020-11-22 08:29

    You can utilize a python implementation SWTloc.

    Full Disclosure : I am the author of this library

    To do that :-

    First and Second Image

    Notice that the text_mode here is 'lb_df', which stands for Light Background Dark Foreground i.e the text in this image is going to be in darker color than the background

    from swtloc import SWTLocalizer
    from swtloc.utils import imgshowN, imgshow
    
    swtl = SWTLocalizer()
    # Stroke Width Transform
    swtl.swttransform(imgpaths='img1.jpg', text_mode = 'lb_df',
                      save_results=True, save_rootpath = 'swtres/',
                      minrsw = 3, maxrsw = 20, max_angledev = np.pi/3)
    imgshow(swtl.swtlabelled_pruned13C)
    
    # Grouping
    respacket=swtl.get_grouped(lookup_radii_multiplier=0.9, ht_ratio=3.0)
    grouped_annot_bubble = respacket[2]
    maskviz = respacket[4]
    maskcomb  = respacket[5]
    
    # Saving the results
    _=cv2.imwrite('img1_processed.jpg', swtl.swtlabelled_pruned13C)
    imgshowN([maskcomb, grouped_annot_bubble], savepath='grouped_img1.jpg')
    


    Third Image

    Notice that the text_mode here is 'db_lf', which stands for Dark Background Light Foreground i.e the text in this image is going to be in lighter color than the background

    from swtloc import SWTLocalizer
    from swtloc.utils import imgshowN, imgshow
    
    swtl = SWTLocalizer()
    # Stroke Width Transform
    swtl.swttransform(imgpaths=imgpaths[1], text_mode = 'db_lf',
                  save_results=True, save_rootpath = 'swtres/',
                  minrsw = 3, maxrsw = 20, max_angledev = np.pi/3)
    imgshow(swtl.swtlabelled_pruned13C)
    
    # Grouping
    respacket=swtl.get_grouped(lookup_radii_multiplier=0.9, ht_ratio=3.0)
    grouped_annot_bubble = respacket[2]
    maskviz = respacket[4]
    maskcomb  = respacket[5]
    
    # Saving the results
    _=cv2.imwrite('img1_processed.jpg', swtl.swtlabelled_pruned13C)
    imgshowN([maskcomb, grouped_annot_bubble], savepath='grouped_img1.jpg')
    

    You will also notice that the grouping done is not so accurate, to get the desired results as the images might vary, try to tune the grouping parameters in swtl.get_grouped() function.

    0 讨论(0)
提交回复
热议问题