欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

(附完整python源码)基于tensorflow、opencv的入门案例_发票识别一:关键区域定位

程序员文章站 2022-07-14 12:42:12
...

分为两篇博客:

发票识别一:从一张发票照片精确定位出“发票号码”、“发票代码”的数字区域

发票识别二:将发票号码”、“发票代码”的数字串分割成单个数字(源码见:后面博客)

发票识别三:制作数据集,训练cnn网络识别数字(源码见:后面博客)


注:该代码适用于 “国税通用机打发票”。尽量拍摄下正常摆放的完整发票。


发票识别一:区域精确定位

1 具体步骤

1.1 读入发票

1.2 获取矩形框:将发票可能的区域定位出来

边缘检测-二值化-形态学-轮廓检测-获取多个矩形框

hsv颜色空间 -分别提取“红、黑、蓝”的掩膜-形态学--轮廓检测-获取多个矩形框

结果:得到上百个候选框

1.3 矩形框进行初步筛选。

根据矩形框的长高信息、位置信息进行筛选

结果:剩下十多个矩形框

1.4 矩形框融合

若某个矩形框与其他矩形框重叠、交叉度极高在其他矩形框内部,将其删除

若两个矩形框在同一水平线上,有一定的交叉,且框内都包含数字,将两个框融合

结果:这一步通常还有3-6个矩形框

1.5 定位

剩下的矩形框从上到下排序。根据矩形框的相对位置、尺寸,定位出“发票号码”、“发票代码”的两个区域。

1.6 找了几张发票,地位区域如下。下一步“字符分割”,见发票识别二。

(附完整python源码)基于tensorflow、opencv的入门案例_发票识别一:关键区域定位          (附完整python源码)基于tensorflow、opencv的入门案例_发票识别一:关键区域定位


2.源码如下

2.1 main.py

# encoding: utf-8
import cv2
import numpy as np
import roi_merge as roi_
import util_funs as util
from get_rects import *
def main(img):
	region = get_rects(img)
	roi_solve = roi_.Roi_solve(region)
	roi_solve.rm_inside() 
	roi_solve.rm_overlop()
	region = roi_solve.merge_roi()
	region = util.sort_region(region)
	region = util.get_targetRoi(region)
        for i in range(2):
                rect2 = region[i]
                w1,w2 = rect2[0],rect2[0]+rect2[2]
                h1,h2 = rect2[1],rect2[1]+rect2[3]
                box = [[w1,h2],[w1,h1],[w2,h1],[w2,h2]]
                cv2.drawContours(img, np.array([box]), 0, (0, 255, 0), 1)
                if i == 0:
                        cv2.imwrite('代码'+str(k)+'.jpg', img[h1:h2,w1:w2])
                else:
                        cv2.imwrite('号码'+str(k)+'.jpg', img[h1:h2,w1:w2])
        cv2.imshow('img', img)
	cv2.waitKey(0)

if __name__ == '__main__':
	img = cv2.imread("img_path")
	main(img)

2.2 get_rects.py

# encoding: utf-8
import cv2
import numpy as np
import roi_merge as roi_
import util_funs as util
def get_rects(img_):
	region = []
	#灰度化、滤波、sobel边沿检测后,将保留下来的边界通过形态学变化进行连接成块
	img = sobel_(img_.copy())
	img = morphological_(img)
	#对所有block进行分析,保留可能的目标块,存入region中
	region = region + find_region(img)
	#代码数字的颜色可能是“红”、“黑”、“蓝”。
	#将目标颜色区域进行分离,形态学连接成块,保留可能的目标块。
	for i in range(3):
	#i=0:分类黑色; i=1:分类红色; i=2:分离蓝色
		img = color_(img_.copy(),i) 
		img = morphological_(img)
		region = region + find_region(img)
	return region

def sobel_(img):
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	img = cv2.equalizeHist(img)
	# 高斯平滑
	img = cv2.GaussianBlur(img, (3, 3), 0, 0, cv2.BORDER_DEFAULT)
	img = cv2.equalizeHist(img)
	# 中值滤波
	median = cv2.medianBlur(img, 5)
	# Sobel算子,X方向求梯度
	sobel = cv2.Sobel(median, cv2.CV_8U, 1, 0, ksize = 3)
	# 二值化
	ret, binary = cv2.threshold(sobel, 170, 255, cv2.THRESH_BINARY)
	return binary

def color_(img,flag):
	HSV = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
	#LowerBlue从左到右分别表示"black","red","red","blue"的hsv值
	LowerBlue = [np.array([0,0,0]),np.array([0,43,46]),np.array([156,43,46]),np.array([100,43,46])]
	UpperBlue = [np.array([180,255,180]),np.array([10,255,255]),np.array([180,255,255]),np.array([124,255,255])]
	if flag == 0:
		mask_ = cv2.inRange(HSV.copy(),LowerBlue[3],UpperBlue[3])
	if flag == 1:
		mask_ = cv2.inRange(HSV.copy(),LowerBlue[1],UpperBlue[1]) + cv2.inRange(HSV.copy(),LowerBlue[2],UpperBlue[2])
	if flag == 2:
		mask_ = cv2.inRange(HSV.copy(),LowerBlue[0],UpperBlue[0])
	return mask_

def morphological_(img):
	# 膨胀和腐蚀操作的核函数
	element0 = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 7))
	element1 = cv2.getStructuringElement(cv2.MORPH_RECT, (9, 1))
	# 膨胀、腐蚀、再膨胀,数字连接成一个区块
	dilation = cv2.dilate(img, element0, iterations = 1)
	erosion = cv2.erode(dilation, element0, iterations = 1)
	dilation_ = cv2.dilate(erosion, element1,iterations = 3)
	return dilation_

def find_region(img):
	#图像的宽带和高度
	h_img,w_img = img.shape
	# 查找轮廓
	_,contours,hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
	# 获取矩形框
	rect_list = []
	for i in range(len(contours)):
		cont_ = contours[i]
		# 找到boundingRect
		rect = cv2.boundingRect(cont_)
		rect_list.append(rect)
	#筛选矩形框
	region = []
	print "rect***************"
	print w_img,h_img
	print "rect***************"
	for rect in rect_list:
		# 计算高和宽
		height = rect[3]
		width = rect[2]
		# 判断高度和宽带是否满足要求
		if (width < w_img/10 or width > w_img/3 or height <h_img/50 or height> h_img/15):
			continue
		# 发票代码、号码,长宽比:8-80
		ratio =float(width) / float(height)
		if (ratio > 20 or ratio < 5):
			continue
		#发票代码和发票号码在右上角
		if(rect[0] < w_img/2 or rect[1] > h_img/2):
			continue
		region.append(rect)
	return region

2.3 roi_merge.py

#encoding: utf-8
import cv2
import numpy as np

class Roi_solve:
	def __init__(self,rect): 
		self.rect = rect  #所有矩形框
		self.cursor = -1 #初始化游标位置  
		self.rect_num = len(rect) #记录rect的实时数量
	def next(self):  
		#将游标的位置前移一步,并返回所在检索位的矩形框
		self.cursor = self.cursor+1  
		return self.rect[self.cursor]
	def hasNext(self):  
		#判断是否已经检查完了所有矩形框 
		return self.rect_num > self.cursor + 1
	def remove(self,flag = -1):  
		#将非优解从数据集删除
		if flag == -1:
			del self.rect[self.cursor]
			#删除当前游标位置,游标回退一步  
			self.cursor = self.cursor-1  
		else:
			#删除后面位置的rect,游标不动
			del self.rect[flag]
		#rect数量,减1  
		self.rect_num = self.rect_num - 1
	def add(self,add_rect):
		self.rect.append(add_rect)
		self.rect_num = self.rect_num + 1
	def get_u_d_l_r(self,rect_):
		#获取rect的上下左右边界值
		upper_,down_ = rect_[1],rect_[1] + rect_[3]
		left_,right_ = rect_[0],rect_[0] + rect_[2]
		return upper_,down_,left_,right_
	def is_intersect(self,y01, y02 , x01, x02, y11, y12 , x11, x12):  
	    # 判断两个矩形是否相交    
	    lx = abs((x01 + x02) / 2 - (x11 + x12) / 2)  
	    ly = abs((y01 + y02) / 2 - (y11 + y12) / 2)  
	    sax = abs(x01 - x02)  
	    sbx = abs(x11 - x12)  
	    say = abs(y01 - y02)  
	    sby = abs(y11 - y12)  
	    if lx <= (sax + sbx) / 2 and ly <= (say + sby) / 2:  
		return True  
	    else:  
		return False  
	def intersect_area(self,y01, y02 , x01, x02, y11, y12 , x11, x12): 
		#返回两个rect的交叉面积 
	        col=min(x02,x12)-max(x01,x11)  
		row=min(y02,y12)-max(y01,y11)  
		return col*row  
	def intersect_height(self,y01, y02, y11, y12): 
		#height轴方向交叉,返回height交叉段占比
		row=float(min(y02,y12)-max(y01,y11))   
		return max(row/float(y02-y01),row/float(y12-y11))
	#remove_inside:如果“本rect”被“其他rect”包围了,则删除
	def remove_inside(self,rect_curr):
		#获取当前rect的上下左右边界信息  
		u_curr,d_curr,l_curr,r_curr = self.get_u_d_l_r(rect_curr)
		#判断当前rect是否在内部
		for rect_ in self.rect:
			u_,d_,l_,r_ = self.get_u_d_l_r(rect_)		
			if u_curr>u_ and d_curr<d_ and l_curr>l_ and r_curr<r_:  
				self.remove()
				break
	#remove_overlop:如果“本rect”和“其他rect”相交区域达到了95%以上,则删除
	def remove_overlop(self,rect_curr):
		#获取当前rect的上下左右边界信息  
		u_curr,d_curr,l_curr,r_curr = self.get_u_d_l_r(rect_curr)
		area_curr = rect_curr[2] * rect_curr[3]
		#判断当前rect是否在内部
		for rect_ in self.rect:
			u_,d_,l_,r_ = self.get_u_d_l_r(rect_)	
			if self.is_intersect(u_curr,d_curr,l_curr,r_curr,u_,d_,l_,r_): 
				if rect_ == rect_curr:
					continue
				else :
					area_ = self.intersect_area(u_curr,d_curr,l_curr,r_curr,u_,d_,l_,r_)
				if float(area_)/float(area_curr) >0.95:
					self.remove()
					break
	#如果“两个rect”在同一水平面上,横向坐标
	def merge(self,rect_curr):
		#获取当前rect的上下左右边界信息  
		u_curr,d_curr,l_curr,r_curr = self.get_u_d_l_r(rect_curr)
		#判断当前rect是否在内部
		for i in range(self.cursor+1,len(self.rect)):
			print i,self.cursor+1,len(self.rect)
			rect_ = self.rect[i]
			u_,d_,l_,r_ = self.get_u_d_l_r(rect_)	
			#判断是否相交	
			if self.is_intersect(u_curr,d_curr,l_curr,r_curr,u_,d_,l_,r_): 
				if self.intersect_height(u_curr,d_curr,u_,d_) > 0.6:
					if rect_curr[2] > rect_[2]:
						new_rect = np.array(rect_curr)
					else:
						new_rect = np.array(rect_)
					new_l = min(l_curr,l_)
					new_r = max(r_curr,r_)
					new_rect[0] = new_l
					new_rect[2] = new_r-new_l
					self.remove(i)
					self.remove()
					self.add(new_rect)
					break
	def rm_inside(self): 
		self.cursor = -1 
		while(self.hasNext()):  
			rect_curr = self.next()  
			self.remove_inside(rect_curr)
		return self.rect
	def rm_overlop(self):
		self.cursor = -1
		while(self.hasNext()):  
			rect_curr = self.next()  
			self.remove_overlop(rect_curr)
		return self.rect
	def merge_roi(self):
		self.cursor = -1
		while(self.hasNext()):  
			rect_curr = self.next()  
			self.merge(rect_curr)
		return self.rect

2.4 util_funs.py

#encoding:utf-8
import cv2
import numpy as np

def get_u_d_l_r(rect_):
	#获取rect的上下左右边界值
	upper_,down_ = rect_[1],rect_[1] + rect_[3]
	left_,right_ = rect_[0],rect_[0] + rect_[2]
	return upper_,down_,left_,right_

#region排序。flag=1时:从上到下;flag=0时:从左到右
def sort_region(region,flag = 1):
	temp = []
	region_new = []
	for rect in region:
		temp.append(rect[flag])
	temp_sort = sorted(temp)
	for height_ in temp_sort:
		index_ = temp.index(height_)
		region_new.append(region[index_])
	return region_new

#判断上下两个相邻框框是否为发票代码、发票号码
def judge_(rect_0,rect_1):
	u_d_l_r_0 = get_u_d_l_r(rect_0)
	u_d_l_r_1 = get_u_d_l_r(rect_1)
	#两个rect上边界之间的距离不超过box_height的四倍
	distance_ = rect_1[1] - rect_0[1]
	box_height = float(rect_0[3] + rect_1[3])/2
	#上边框的右边界值更大
	if (distance_ > 0 and distance_ < box_height*3 and rect_0[0]+rect_0[2] > rect_1[0]+rect_1[2]):
			return True
	return False

#获取 “发票代码”、“发票号码”的区域
def get_targetRoi(region):
	if len(region) > 1:
		#按照从上到下排序
		new_region = sort_region(region)
		for i in range(len(new_region)-1):
			if judge_(new_region[i],new_region[i+1]):
				return [new_region[i],new_region[i+1]]