Detect text region in image using Opencv Detect text region in image using Opencv python python

Detect text region in image using Opencv


import cv2def captch_ex(file_name):    img = cv2.imread(file_name)    img_final = cv2.imread(file_name)    img2gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)    ret, mask = cv2.threshold(img2gray, 180, 255, cv2.THRESH_BINARY)    image_final = cv2.bitwise_and(img2gray, img2gray, mask=mask)    ret, new_img = cv2.threshold(image_final, 180, 255, cv2.THRESH_BINARY)  # for black text , cv.THRESH_BINARY_INV    '''            line  8 to 12  : Remove noisy portion     '''    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,                                                         3))  # to manipulate the orientation of dilution , large x means horizonatally dilating  more, large y means vertically dilating more    dilated = cv2.dilate(new_img, kernel, iterations=9)  # dilate , more the iteration more the dilation    # for cv2.x.x    _, contours, hierarchy = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)  # findContours returns 3 variables for getting contours    # for cv3.x.x comment above line and uncomment line below    #image, contours, hierarchy = cv2.findContours(dilated,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)    for contour in contours:        # get rectangle bounding contour        [x, y, w, h] = cv2.boundingRect(contour)        # Don't plot small false positives that aren't text        if w < 35 and h < 35:            continue        # draw rectangle around contour on original image        cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 255), 2)        '''        #you can crop image and send to OCR  , false detected will return no text :)        cropped = img_final[y :y +  h , x : x + w]        s = file_name + '/crop_' + str(index) + '.jpg'         cv2.imwrite(s , cropped)        index = index + 1        '''    # write original image with added contours to disk    cv2.imshow('captcha_result', img)    cv2.waitKey()file_name = 'your_image.jpg'captch_ex(file_name)

Click to see result

Click to see result


Since no one has posted a complete solution, here's an approach. Using the observation that the desired text is in white and that words are structured in a horizontal alignment, we can use color segmentation to extract and OCR the letters.

  1. Perform color segmentation. We load the image, convert to HSV format, define lower/upper ranges and perform color segmentation using cv2.inRange() to obtain a binary mask

  2. Dilate to connect text characters. We create a horizontal shaped kernel using cv2.getStructuringElement() then dilate using cv2.dilate() to combine individual letters into a single contour

  3. Remove non-text contours. We find contours with cv2.findContours() and filter using aspect ratio to remove non-text characters. Since the text is in a horizontal orientation, if the contour is determined to be less than a predefined aspect ratio threshold then we remove the non-text contour by filling in the contour with cv2.drawContours()

  4. Perform OCR. We bitwise-and the dilated image with the initial mask to isolate only text characters and invert the image so that the text is in black with the background in white. Finally, we throw the image into Pytesseract OCR


Here's a visualization of each step:

Input image

Mask generated from color segmentation

# Load image, convert to HSV format, define lower/upper ranges, and perform# color segmentation to create a binary maskimage = cv2.imread('1.jpg')hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)lower = np.array([0, 0, 218])upper = np.array([157, 54, 255])mask = cv2.inRange(hsv, lower, upper)

Dilated image to connect text-contours and removed non-text contours using aspect ratio filtering

# Create horizontal kernel and dilate to connect text characterskernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))dilate = cv2.dilate(mask, kernel, iterations=5)# Find contours and filter using aspect ratio# Remove non-text contours by filling in the contourcnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] if len(cnts) == 2 else cnts[1]for c in cnts:    x,y,w,h = cv2.boundingRect(c)    ar = w / float(h)    if ar < 5:        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)

Bitwise-and both masks and invert to get result ready for OCR

# Bitwise dilated image with mask, invert, then OCRresult = 255 - cv2.bitwise_and(dilate, mask)data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')print(data)

Result from Pytesseract OCR using --psm 6 configuration setting to assume a uniform block of text. Look here for more configuration options

All women becomelike their mothers.That is their tragedy.No man does.That's his.OSCAR WILDE

Full code

import cv2import numpy as npimport pytesseractpytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"# Load image, convert to HSV format, define lower/upper ranges, and perform# color segmentation to create a binary maskimage = cv2.imread('1.jpg')hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)lower = np.array([0, 0, 218])upper = np.array([157, 54, 255])mask = cv2.inRange(hsv, lower, upper)# Create horizontal kernel and dilate to connect text characterskernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5,3))dilate = cv2.dilate(mask, kernel, iterations=5)# Find contours and filter using aspect ratio# Remove non-text contours by filling in the contourcnts = cv2.findContours(dilate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)cnts = cnts[0] if len(cnts) == 2 else cnts[1]for c in cnts:    x,y,w,h = cv2.boundingRect(c)    ar = w / float(h)    if ar < 5:        cv2.drawContours(dilate, [c], -1, (0,0,0), -1)# Bitwise dilated image with mask, invert, then OCRresult = 255 - cv2.bitwise_and(dilate, mask)data = pytesseract.image_to_string(result, lang='eng',config='--psm 6')print(data)cv2.imshow('mask', mask)cv2.imshow('dilate', dilate)cv2.imshow('result', result)cv2.waitKey()

The HSV lower/upper color range was determined using this HSV color thresholder script

import cv2import numpy as npdef nothing(x):    pass# Load imageimage = cv2.imread('1.jpg')# Create a windowcv2.namedWindow('image')# Create trackbars for color change# Hue is from 0-179 for Opencvcv2.createTrackbar('HMin', 'image', 0, 179, nothing)cv2.createTrackbar('SMin', 'image', 0, 255, nothing)cv2.createTrackbar('VMin', 'image', 0, 255, nothing)cv2.createTrackbar('HMax', 'image', 0, 179, nothing)cv2.createTrackbar('SMax', 'image', 0, 255, nothing)cv2.createTrackbar('VMax', 'image', 0, 255, nothing)# Set default value for Max HSV trackbarscv2.setTrackbarPos('HMax', 'image', 179)cv2.setTrackbarPos('SMax', 'image', 255)cv2.setTrackbarPos('VMax', 'image', 255)# Initialize HSV min/max valueshMin = sMin = vMin = hMax = sMax = vMax = 0phMin = psMin = pvMin = phMax = psMax = pvMax = 0while(1):    # Get current positions of all trackbars    hMin = cv2.getTrackbarPos('HMin', 'image')    sMin = cv2.getTrackbarPos('SMin', 'image')    vMin = cv2.getTrackbarPos('VMin', 'image')    hMax = cv2.getTrackbarPos('HMax', 'image')    sMax = cv2.getTrackbarPos('SMax', 'image')    vMax = cv2.getTrackbarPos('VMax', 'image')    # Set minimum and maximum HSV values to display    lower = np.array([hMin, sMin, vMin])    upper = np.array([hMax, sMax, vMax])    # Convert to HSV format and color threshold    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)    mask = cv2.inRange(hsv, lower, upper)    result = cv2.bitwise_and(image, image, mask=mask)    # Print if there is a change in HSV value    if((phMin != hMin) | (psMin != sMin) | (pvMin != vMin) | (phMax != hMax) | (psMax != sMax) | (pvMax != vMax) ):        print("(hMin = %d , sMin = %d, vMin = %d), (hMax = %d , sMax = %d, vMax = %d)" % (hMin , sMin , vMin, hMax, sMax , vMax))        phMin = hMin        psMin = sMin        pvMin = vMin        phMax = hMax        psMax = sMax        pvMax = vMax    # Display result image    cv2.imshow('image', result)    if cv2.waitKey(10) & 0xFF == ord('q'):        breakcv2.destroyAllWindows()


If you don't mind getting your hands dirty you could try and grow those text regions into one bigger rectangular region, which you feed to tesseract all at once.

I'd also suggest trying to threshold the image several times and feeding each of those to tesseract separately to see if that helps at all. You can compare the output to dictionary words to automatically determine if a particular OCR result is good or not.