Remove background text and noise from an image using image processing with OpenCV

前端 未结 3 1812
北海茫月
北海茫月 2021-02-08 18:08

I have these images

For which I want to remove the text in the background. Only the captcha characters should remain(i.e K6PwKA, YabVzu).

3条回答
  •  再見小時候
    2021-02-08 18:35

    Here are two potential approaches and a method to correct distorted text:

    Method #1: Morphological operations + contour filtering

    1. Obtain binary image. Load image, grayscale, then Otsu's threshold.

    2. Remove text contours. Create a rectangular kernel with cv2.getStructuringElement and then perform morphological operations to remove noise.

    3. Filter and remove small noise. Find contours and filter using contour area to remove small particles. We effectively remove the noise by filling in the contour with cv2.drawContours

    4. Perform OCR. We invert the image then apply a slight Gaussian blur. We then OCR using Pytesseract with the --psm 6 configuration option to treat the image as a single block of text. Look at Tesseract improve quality for other methods to improve detection and Pytesseract configuration options for additional settings.


    Input image -> Binary -> Morph opening

    Contour area filtering -> Invert -> Apply blur to get result

    Result from OCR

    YabVzu
    

    Code

    import cv2
    import pytesseract
    import numpy as np
    
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
    
    # Load image, grayscale, Otsu's threshold
    image = cv2.imread('2.png')
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    
    # Morph open to remove noise
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2,2))
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    
    # Find contours and remove small noise
    cnts = cv2.findContours(opening, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    for c in cnts:
        area = cv2.contourArea(c)
        if area < 50:
            cv2.drawContours(opening, [c], -1, 0, -1)
    
    # Invert and apply slight Gaussian blur
    result = 255 - opening
    result = cv2.GaussianBlur(result, (3,3), 0)
    
    # Perform OCR
    data = pytesseract.image_to_string(result, lang='eng', config='--psm 6')
    print(data)
    
    cv2.imshow('thresh', thresh)
    cv2.imshow('opening', opening)
    cv2.imshow('result', result)
    cv2.waitKey()     
    

    Method #2: Color segmentation

    With the observation that the desired text to extract has a distinguishable contrast from the noise in the image, we can use color thresholding to isolate the text. The idea is to convert to HSV format then color threshold to obtain a mask using a lower/upper color range. From were we use the same process to OCR with Pytesseract.


    Input image -> Mask -> Result

    Code

    import cv2
    import pytesseract
    import numpy as np
    
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
    
    # Load image, convert to HSV, color threshold to get mask
    image = cv2.imread('2.png')
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower = np.array([0, 0, 0])
    upper = np.array([100, 175, 110])
    mask = cv2.inRange(hsv, lower, upper)
    
    # Invert image and OCR
    invert = 255 - mask
    data = pytesseract.image_to_string(invert, lang='eng', config='--psm 6')
    print(data)
    
    cv2.imshow('mask', mask)
    cv2.imshow('invert', invert)
    cv2.waitKey()
    

    Correcting distorted text

    OCR works best when the image is horizontal. To ensure that the text is in an ideal format for OCR, we can perform a perspective transform. After removing all the noise to isolate the text, we can perform a morph close to combine individual text contours into a single contour. From here we can find the rotated bounding box using cv2.minAreaRect and then perform a four point perspective transform using imutils.perspective.four_point_transform. Continuing from the cleaned mask, here's the results:

    Mask -> Morph close -> Detected rotated bounding box -> Result

    Output with the other image

    Updated code to include perspective transform

    import cv2
    import pytesseract
    import numpy as np
    from imutils.perspective import four_point_transform
    
    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
    
    # Load image, convert to HSV, color threshold to get mask
    image = cv2.imread('1.png')
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    lower = np.array([0, 0, 0])
    upper = np.array([100, 175, 110])
    mask = cv2.inRange(hsv, lower, upper)
    
    # Morph close to connect individual text into a single contour
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
    close = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel, iterations=3)
    
    # Find rotated bounding box then perspective transform
    cnts = cv2.findContours(close, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    rect = cv2.minAreaRect(cnts[0])
    box = cv2.boxPoints(rect)
    box = np.int0(box)
    cv2.drawContours(image,[box],0,(36,255,12),2)
    warped = four_point_transform(255 - mask, box.reshape(4, 2))
    
    # OCR
    data = pytesseract.image_to_string(warped, lang='eng', config='--psm 6')
    print(data)
    
    cv2.imshow('mask', mask)
    cv2.imshow('close', close)
    cv2.imshow('warped', warped)
    cv2.imshow('image', image)
    cv2.waitKey()
    

    Note: The color threshold range was determined using this HSV threshold script

    import cv2
    import numpy as np
    
    def nothing(x):
        pass
    
    # Load image
    image = cv2.imread('2.png')
    
    # Create a window
    cv2.namedWindow('image')
    
    # Create trackbars for color change
    # Hue is from 0-179 for Opencv
    cv2.createTrackbar('HMin', 'image', 0, 179, nothing)
    cv2.createTrackbar('SMin', 'image', 0, 255, nothing)
    cv2.createTrackbar('VMin', 'image', 0, 255, nothing)
    cv2.createTrackbar('HMax', 'image', 0, 179, nothing)
    cv2.createTrackbar('SMax', 'image', 0, 255, nothing)
    cv2.createTrackbar('VMax', 'image', 0, 255, nothing)
    
    # Set default value for Max HSV trackbars
    cv2.setTrackbarPos('HMax', 'image', 179)
    cv2.setTrackbarPos('SMax', 'image', 255)
    cv2.setTrackbarPos('VMax', 'image', 255)
    
    # Initialize HSV min/max values
    hMin = sMin = vMin = hMax = sMax = vMax = 0
    phMin = psMin = pvMin = phMax = psMax = pvMax = 0
    
    while(1):
        # Get current positions of all trackbars
        hMin = cv2.getTrackbarPos('HMin', 'image')
        sMin = cv2.getTrackbarPos('SMin', 'image')
        vMin = cv2.getTrackbarPos('VMin', 'image')
        hMax = cv2.getTrackbarPos('HMax', 'image')
        sMax = cv2.getTrackbarPos('SMax', 'image')
        vMax = cv2.getTrackbarPos('VMax', 'image')
    
        # Set minimum and maximum HSV values to display
        lower = np.array([hMin, sMin, vMin])
        upper = np.array([hMax, sMax, vMax])
    
        # Convert to HSV format and color threshold
        hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
        mask = cv2.inRange(hsv, lower, upper)
        result = cv2.bitwise_and(image, image, mask=mask)
    
        # Print if there is a change in HSV value
        if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):
            print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))
            phMin = hMin
            psMin = sMin
            pvMin = vMin
            phMax = hMax
            psMax = sMax
            pvMax = vMax
    
        # Display result image
        cv2.imshow('image', result)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    
    cv2.destroyAllWindows()
    

提交回复
热议问题