Detect text region in image using Opencv
import cv2def captch_ex(file_name): img = cv2.imread(file_name) img_final = cv2.imread(file_name) img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY) image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask) ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY) # for black text , cv.THRESH_BINARY_INV ''' line 8 to 12 : Remove noisy portion ''' kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3, 3)) # to manipulate the orientation of dilution , large x means horizonatally dilating more, large y means vertically dilating more dilated = cv2.dilate(new_img, kernel, iterations=9) # dilate , more the iteration more the dilation # for cv2.x.x _, contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) # findContours returns 3 variables for getting contours # for cv3.x.x comment above line and uncomment line below #image, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE) for contour in contours: # get rectangle bounding contour [x, y, w, h] = cv2.boundingRect(contour) # Don't plot small false positives that aren't text if w < 35 and h < 35: continue # draw rectangle around contour on original image cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2) ''' #you can crop image and send to OCR , false detected will return no text :) cropped = img_final[y :y + h , x : x + w] s = file_name + '/crop_' + str(index) + '.jpg' cv2.imwrite(s , cropped) index = index + 1 ''' # write original image with added contours to disk cv2.imshow('captcha_result', img) cv2.waitKey()file_name = 'your_image.jpg'captch_ex(file_name)
Since no one has posted a complete solution, here's an approach. Using the observation that the desired text is in white and that words are structured in a horizontal alignment, we can use color segmentation to extract and OCR the letters.
Perform color segmentation. We load the image, convert to HSV format, define lower/upper ranges and perform color segmentation using
cv2.inRange()
to obtain a binary maskDilate to connect text characters. We create a horizontal shaped kernel using
cv2.getStructuringElement()
then dilate usingcv2.dilate()
to combine individual letters into a single contourRemove non-text contours. We find contours with
cv2.findContours()
and filter using aspect ratio to remove non-text characters. Since the text is in a horizontal orientation, if the contour is determined to be less than a predefined aspect ratio threshold then we remove the non-text contour by filling in the contour withcv2.drawContours()
Perform OCR. We bitwise-and the dilated image with the initial mask to isolate only text characters and invert the image so that the text is in black with the background in white. Finally, we throw the image into Pytesseract OCR
Here's a visualization of each step:
Input image
Mask generated from color segmentation
# Load image, convert to HSV format, define lower/upper ranges, and perform# color segmentation to create a binary maskimage = cv2.imread('1.jpg')hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)lower = np.array([0, 0, 218])upper = np.array([157, 54, 255])mask = cv2.inRange(hsv, lower, upper)
Dilated image to connect text-contours and removed non-text contours using aspect ratio filtering
# Create horizontal kernel and dilate to connect text characterskernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))dilate = cv2.dilate(mask, kernel, iterations=5)# Find contours and filter using aspect ratio# Remove non-text contours by filling in the contourcnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] if len(cnts) == 2 else cnts[1]for c in cnts: x,y,w,h = cv2.boundingRect(c) ar = w / float(h) if ar < 5: cv2.drawContours(dilate, [c], -1, (0,0,0), -1)
Bitwise-and both masks and invert to get result ready for OCR
# Bitwise dilated image with mask, invert, then OCRresult = 255 - cv2.bitwise_and(dilate, mask)data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')print(data)
Result from Pytesseract OCR using --psm 6
configuration setting to assume a uniform block of text. Look here for more configuration options
All women becomelike their mothers.That is their tragedy.No man does.That's his.OSCAR WILDE
Full code
import cv2import numpy as npimport pytesseractpytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"# Load image, convert to HSV format, define lower/upper ranges, and perform# color segmentation to create a binary maskimage = cv2.imread('1.jpg')hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)lower = np.array([0, 0, 218])upper = np.array([157, 54, 255])mask = cv2.inRange(hsv, lower, upper)# Create horizontal kernel and dilate to connect text characterskernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))dilate = cv2.dilate(mask, kernel, iterations=5)# Find contours and filter using aspect ratio# Remove non-text contours by filling in the contourcnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] if len(cnts) == 2 else cnts[1]for c in cnts: x,y,w,h = cv2.boundingRect(c) ar = w / float(h) if ar < 5: cv2.drawContours(dilate, [c], -1, (0,0,0), -1)# Bitwise dilated image with mask, invert, then OCRresult = 255 - cv2.bitwise_and(dilate, mask)data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')print(data)cv2.imshow('mask', mask)cv2.imshow('dilate', dilate)cv2.imshow('result', result)cv2.waitKey()
The HSV lower/upper color range was determined using this HSV color thresholder script
import cv2import numpy as npdef nothing(x): pass# Load imageimage = cv2.imread('1.jpg')# Create a windowcv2.namedWindow('image')# Create trackbars for color change# Hue is from 0-179 for Opencvcv2.createTrackbar('HMin', 'image', 0, 179, nothing)cv2.createTrackbar('SMin', 'image', 0, 255, nothing)cv2.createTrackbar('VMin', 'image', 0, 255, nothing)cv2.createTrackbar('HMax', 'image', 0, 179, nothing)cv2.createTrackbar('SMax', 'image', 0, 255, nothing)cv2.createTrackbar('VMax', 'image', 0, 255, nothing)# Set default value for Max HSV trackbarscv2.setTrackbarPos('HMax', 'image', 179)cv2.setTrackbarPos('SMax', 'image', 255)cv2.setTrackbarPos('VMax', 'image', 255)# Initialize HSV min/max valueshMin = sMin = vMin = hMax = sMax = vMax = 0phMin = psMin = pvMin = phMax = psMax = pvMax = 0while(1): # Get current positions of all trackbars hMin = cv2.getTrackbarPos('HMin', 'image') sMin = cv2.getTrackbarPos('SMin', 'image') vMin = cv2.getTrackbarPos('VMin', 'image') hMax = cv2.getTrackbarPos('HMax', 'image') sMax = cv2.getTrackbarPos('SMax', 'image') vMax = cv2.getTrackbarPos('VMax', 'image') # Set minimum and maximum HSV values to display lower = np.array([hMin, sMin, vMin]) upper = np.array([hMax, sMax, vMax]) # Convert to HSV format and color threshold hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) mask = cv2.inRange(hsv, lower, upper) result = cv2.bitwise_and(image, image, mask=mask) # Print if there is a change in HSV value if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ): print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax)) phMin = hMin psMin = sMin pvMin = vMin phMax = hMax psMax = sMax pvMax = vMax # Display result image cv2.imshow('image', result) if cv2.waitKey(10) & 0xFF == ord('q'): breakcv2.destroyAllWindows()
If you don't mind getting your hands dirty you could try and grow those text regions into one bigger rectangular region, which you feed to tesseract all at once.
I'd also suggest trying to threshold the image several times and feeding each of those to tesseract separately to see if that helps at all. You can compare the output to dictionary words to automatically determine if a particular OCR result is good or not.