问题
I've been building automatic meter reader for Raspberry Pi. I've successfully localized the meter display using yolo object detection. After that, I cropped the display for the next pipeline, that is segmenting the characters.
But I'm stuck here. I can't segment the characters perfectly.. here are some code & samples of my currrent effort:
import glob
import os
import tkinter as tk
# from pathlib import Path
from tkinter import filedialog
# 3rd party
import cv2
import imutils
import matplotlib.pyplot as plt
import numpy as np
import pytesseract
# import skimage.filters as skfilters
from pytesseract import Output
# from skimage.morphology import skeletonize
source = "sample1.jpg"
type = 'analog' # analog or digital
output_dir = '.'
step = 0
def open_file():
""""""
global img_color, img, source, order
print("opening file...")
order = order + 1
if source is None:
# root = tk.Tk()
tk.Tk().withdraw()
source = filedialog.askopenfilename()
print(source)
img = cv2.imread(source)
img_color = img
def resize():
""""""
global img, img_color, output_dir, order, source
print("performing resize...")
order = order + 1
img = imutils.resize(img, height=200)
img_color = imutils.resize(img_color, height=200)
# cv2.imwrite(os.path.join(output_dir, "{0}-{1}-resize.png".format(os.path.basename(source), str(order))), img)
# cv2.imwrite(os.path.join(output_dir, "{0}-{1}-resize_orig.png".format(os.path.basename(source), str(order))), img_color)
def gray():
""" """
global img, output_dir, order, source
print("performing gray...")
order = order + 1
img = cv2.split(cv2.cvtColor(img, cv2.COLOR_BGR2HSV))[2]
# img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-gray.png".format(os.path.basename(source), str(order))), img)
def eq():
""""""
global img, img_color, output_dir, order, source
print("performing eq")
order = order + 1
img = cv2.equalizeHist(img)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-eq.png".format(os.path.basename(source), str(order))), img)
def blur():
""""""
global img, img_color, output_dir, order, source
print("performing blur")
order = order + 1
# img = cv2.GaussianBlur(img, (7, 7), 3)
img = cv2.medianBlur(img, 9)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-blur.png".format(os.path.basename(source), str(order))), img)
def denoise():
global img, img_color, output_dir, order, source
print("performing denoise...")
order = order + 1
img = cv2.fastNlMeansDenoising(img, None, 10, 21, 7)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-denoise1.png".format(os.path.basename(source), str(order))), img)
img = cv2.bilateralFilter(img, 9, 75, 75)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-denoise2.png".format(os.path.basename(source), str(order))), img)
def edge():
""""""
global img, img_color, output_dir, order, source
print("performing edge...")
order = order + 1
sigma = 0.33
# compute the median of the single channel pixel intensities
v = np.median(img)
# apply automatic Canny edge detection using the computed median
lower = int(max(0, (1.0 - sigma) * v))
upper = int(min(255, (1.0 + sigma) * v))
img = cv2.Canny(img, lower, upper)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-edge.png".format(os.path.basename(source), str(order))), img)
def thres():
""""""
global img, output_dir, order, source, type
print("performing thres...")
order = order + 1
# ret, img = cv2.threshold(img, 100, 200, cv2.THRESH_OTSU)
# ret, img = cv2.threshold(img, 127, 255, cv2.THRESH_BINARY)
# ret, img = cv2.threshold(img, 0, 200, cv2.THRESH_TOZERO)
# img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 115, 1)
# T = threshold_local(V, 29, offset=15, method="gaussian")
# thresh = (V > T).astype("uint8") * 255
# thresh = cv2.bitwise_not(thresh)
if type == 'digital':
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) # digital
elif type == 'analog':
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 115, 3) # analog
# img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
# img[img == 255] = 1
# img = skeletonize(img)
# thresh = skfilters.threshold_mean(img)
# img = img > thresh
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-thres.png".format(os.path.basename(source), str(order))), img)
def morph():
""""""
global img, img_color, output_dir, order, source
print("performing morph...")
order = order + 1
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# cv2.erode(img, kernel)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
# kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
# img = cv2.morphologyEx(img, cv2.MORPH_TOPHAT, kernel)
# img = cv2.morphologyEx(img, cv2.MORPH_BLACKHAT, kernel)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-morph.png".format(os.path.basename(source), str(order))), img)
def find_blob():
""""""
global img_color, img, output_dir, order, source
print("performing find_blob...")
order = order + 1
connectivity = 8
num_labels, labelmap = cv2.connectedComponents(img, connectivity, cv2.CV_32S)
# img = np.hstack((img, labelmap.astype(np.float32)/(num_labels - 1)))
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-blob.png".format(os.path.basename(source), str(order))), img)
def find_contours():
""" """
global img_color, img, output_dir, order, source
print("performing find_contours...")
order = order + 1
ctrs = cv2.findContours(img, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
ctrs = imutils.grab_contours(ctrs)
img_ctrs = img_color.copy()
cv2.drawContours(img_ctrs, ctrs, -1, (0, 0, 255), 1) # all contours
charBoxes = []
for ctr in ctrs:
(x, y, w, h) = cv2.boundingRect(ctr)
roi = img[y:y + h, x:x + w]
area = w * h
# print(area)
cv2.rectangle(img_ctrs, (x, y), (x + w, y + h), (255, 0, 0), 1)
# img = cv2.rectangle(img_ctrs, (x, y), (x + w, y + h), (0, 255, 0), 4)
if h >= w and 1000 < area < 10000:
# if 500 < area < 3000 :
charBoxes.append((x, y, w, h))
charBoxes = sorted(charBoxes)
filename = os.path.splitext(source)[0]
name = os.path.basename(filename)
img_chars = []
for (i, c) in enumerate(charBoxes):
# M = cv2.moments(c)
# cX = int(M["m10"] / M["m00"])
# cY = int(M["m01"] / M["m00"])
# int(r[1]): int(r[1] + r[3]), int(r[0]): int(r[0] + r[2])
img_char = img[c[1]:c[1] + c[3], c[0]:c[0] + c[2]] # crop char
# make broders
top = int(0.2 * img_char.shape[0]) # shape[0] = rows
bottom = top
left = int(0.2 * img_char.shape[1]) # shape[1] = cols
right = left
img_char = cv2.copyMakeBorder(img_char, top, bottom, left, right, cv2.BORDER_CONSTANT, None, 0)
# invert color
img_char = cv2.bitwise_not(img_char)
img_chars.append(img_char)
cv2.imwrite(os.path.join(output_dir, "{1}-{0}-char-{2}.png".format(order, name, i + 1)), img_char)
cv2.rectangle(img_ctrs, c, (0, 255, 0), 3) # char contours
cv2.putText(img_ctrs, "#{}".format(i + 1), (c[0] + 2, c[1]), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 255, 0), 2)
cv2.imwrite(os.path.join(output_dir, "{0}-{1}-ctrs.png".format(os.path.basename(source), str(order))), img_ctrs)
return img_chars
def detect_chars(image):
""""""
d = pytesseract.image_to_data(img, output_type=Output.DICT)
n_boxes = len(d['level'])
for i in range(n_boxes):
(x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 255, 0), 2)
cv2.imshow('img', img)
cv2.waitKey(0)
def detect_mser(img):
""""""
# img = cv2.imread('bed.jpg', 0);
vis = img.copy()
mser = cv2.MSER_create()
regions, _ = mser.detectRegions(img)
hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions]
cv2.polylines(vis, hulls, 1, (0, 255, 0))
cv2.imshow('img', vis)
cv2.waitKey(0)
cv2.destroyAllWindows()
def vertical_projection():
""""""
global img
plt.imshow(img, cmap=plt.cm.binary)
plt.figure()
# img = cv2.imread(img)
img = 255 - img
img_row_sum = np.sum(img, axis=1).tolist()
plt.plot(img_row_sum)
plt.show()
cv2.waitKey(0)
def ocr(image):
""""""
global output_dir, source
# text = pytesseract.image_to_data(image, lang="anyline/7seg", output_type='data.frame', config="--psm 7 digits")
# text = text[text.conf != -1]
# lines = text.groupby('block_num')['text'].apply(list)
# conf = text.groupby(['block_num'])['conf'].mean()
text = pytesseract.image_to_string(image, config="--psm 10 digits")
# with open(os.path.join(output_dir, os.path.basename(source) + '.txt'), 'w+') as f:
# f.write(text)
return(text)
def main_file():
pass
def main_dir():
pass
if __name__ == '__main__':
order = 0
open_file()
# crop()
resize()
gray()
denoise()
# blur()
# eq()
# edge()
thres()
# detect_mser(img)
# exit()
if type == 'analog':
morph()
chars = find_contours()
# find_blob()
text = []
for char in chars:
text.append(ocr(char))
with open(os.path.join(output_dir, os.path.basename(source) + '.txt'), 'w+') as f:
f.write(str(text))
print(text)
requirements.txt file:
cycler==0.10.0
imutils==0.5.3
kiwisolver==1.1.0
matplotlib==3.1.2
munch==2.5.0
numpy==1.17.4
object-detection==0.1
opencv-python==4.1.2.30
pew==1.2.0
Pillow==6.2.1
pipenv==2018.11.15.dev0
pyparsing==2.4.5
PyQtWebEngine==5.13.2
pytesseract==0.3.0
python-dateutil==2.8.1
six==1.13.0
virtualenv-clone==0.5.3
image samples:
meter display 1
meter display 2
meter display 3
meter display 4
Please help me fix/improve the code so that I can segement the characters correctly...
来源:https://stackoverflow.com/questions/59315560/segmenting-meter-characters-for-automatic-meter-reader-using-opencv-python