1 Star 0 Fork 1

kingopq/chinese_ocr

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
chinese_out_jiguan.py 26.54 KB
一键复制 编辑 原始数据 按行查看 历史
心境未回 提交于 2018-12-12 14:04 . ocr1212
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630
#coding:utf-8
import cv2 as cv
import numpy as np
import math
import matplotlib.pyplot as plt
from PIL import Image
import ABO_mix
import chinese_ocr
def chinese_o(th2, shrinkTwoTimesTranslation_copy, x, y, w, h, cha_num, get_all, is_ABO):
return_change = 0
chinese_num_lim = 0
cut = ~th2[y:y+h, x:x+w]#从表格中分割出目标矩形框
copy_cut = ~th2[y:y+h, x:x+w]
cv.imwrite('cut_ori.jpg', cut)
##### 水平投影分割 #####
b = [0 for z in range(0, h)]
for j in range(0,h):
for i in range(0,w):
if copy_cut[j,i]==0:
b[j]+=1
copy_cut[j,i]=255
print b
b_total = 0
for j in range(2,h-2):
b_total = b_total + b[j]
blank_counting = 0
for j in range(3,h-3):
for i in range(3,w-3):
if cut[j,i]==0:
blank_counting+=1
print "blank_counting/w:", blank_counting/w
if float(blank_counting)/w <= 0.5:#排除空框
return return_change, '无'
else:
print sum(b)/(8*h) #4
word_start = 0
word_b_set = []
if sum(b)/(8*h) <= 3:#设定水平分割时的最低阈值
standard = 3
else:
standard = sum(b)/(8*h)#4
for j in range(2,h-2):
if b[j] >= standard and word_start == 0:
word_start_b = j
word_start = 1
#if (b[j] < standard or j == h-3) and word_start == 1:
if (b_total < standard or j == h-3) and word_start == 1:
word_end_b = j
word_start = 0
if abs(word_end_b - word_start_b) >= 10:#行像素超过10则判断为一行手写字
word_b = (word_start_b, word_end_b)
word_b_set.append(word_b)
b_total = b_total - b[j]
print word_b_set
for j in range(0,h):
for i in range(0,b[j]):
copy_cut[j,i]=0
cv.imwrite('copy_cut.jpg', copy_cut)
##### 垂直投影分割 #####
word_set = []
word_cluster_cut_set = []
word_cluster_color_cut_set = []
word_count = 0
for row_cut in range(0,len(word_b_set)):
cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x:x+w]
h = abs(word_b_set[row_cut][1] - word_b_set[row_cut][0])
a = [0 for z in range(0, w)]
#print(a) #a = [0,0,0,0,0,0,0,0,0,0,...,0,0]初始化一个长度为w的数组,用于记录每一列的黑点个数
#记录每一列的波峰
for j in range(0,w): #遍历一列
for i in range(0,h): #遍历一行
if cut[i,j]==0: #如果该点为黑点
a[j]+=1 #该列的计数器加一计数
cut[i,j]=255 #记录完后将其变为白色
print a
print sum(a)/(2*w)#2
if sum(a)/(2*w) <= 3:#设定垂直分割时的最低阈值
standard = 3#3
else:
standard = sum(a)/(2*w)#2
word_cutting = 0
word_cut_counting = 0
word_cut_counting_over = 0
word_total_length = 0
word_start_a = 3#避免受边框的干扰
for j in range(3,w-2):#垂直分割
if a[j] >= standard and word_start == 0:
word_start_a = j
word_start = 1
#连续三个像素都低于阈值则设置分割点
if word_cutting == 1 and a[j] < standard and a[j-1] < standard and a[j-2] < standard:
word_cut_end = j
word_cut_counting+=1
if a[j] < standard and a[j-1] < standard and a[j-2] < standard and word_start == 1 and word_cutting == 0 and word_cut_counting_over == 0:
word_cut_begin = j-2
word_cut_end = j
word_cutting = 1
word_cut_counting = 1
'''
if word_cutting == 1 and a[j] < standard and a[j-1] < standard:
word_cut_end = j
word_cut_counting+=1
if a[j] < standard and a[j-1] < standard and word_start == 1 and word_cutting == 0 and word_cut_counting_over == 0:
word_cut_begin = j-1
word_cut_end = j
word_cutting = 1
word_cut_counting = 1
'''
if (a[j] >= standard and (word_cutting == 1 or word_cut_counting_over == 1)) or (word_cut_counting >= cha_num and word_cutting == 1) or (j == w-3 and word_cut_counting_over == 0):
if word_cut_counting >= cha_num:
word_cut_counting_over = 1
else:
word_cut_counting_over = 0
word_cutting = 0
word_cut_counting = 0
if j == w-3 and word_cut_counting_over == 0:
word_end_a = j
else:
word_end_a = (word_cut_begin + word_cut_end)//2
if abs(word_end_a - word_start_a) >= 12:#列像素超过12则判断为一个手写字
word = (word_start_a, word_end_a, word_b_set[row_cut][0], word_b_set[row_cut][1])
word_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_start_a:x + word_end_a]
word_cut_color = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_start_a:x + word_end_a]
word_cut_mean = cv.mean(word_cut)
print "word_cut_mean[0]:", word_cut_mean[0]
if word_cut_mean[0] < 245:#排除空框
word_set.append(word)
cv.imwrite('word_cut' + str(word_count) + '.jpg', word_cut)#保存分割出的二值化图像
cv.imwrite('cut_color' + str(word_count) + '.jpg', word_cut_color)#保存分割出的灰度图像
word_count+=1
word_start_a = word_end_a
print word_set
##### 合并与切分Clustering or Cutting #####
word_length_set = []
word_cut_position = []
#统计分割字段总长度、平均长度、最小长度
word_shortest_length = word_set[0][1] - word_set[0][0]
for j in range(0,len(word_set)):
word_length = word_set[j][1] - word_set[j][0]
word_total_length = word_total_length + word_length
word_length_set.append(word_length)
if word_length < word_shortest_length and float(word_b_set[row_cut][1] - word_b_set[row_cut][0])/word_length <= 2:
word_shortest_length = word_length
word_average_length = word_total_length/len(word_set)
#print word_set,len(word_set),cha_num
word_cluster_count = 0
if len(word_set) >= cha_num:#当分割个数超过设定的个数则偏向合并,否则切分
word_Cluster2Right = 0
for j in range(0,len(word_set)):
#当单个字段长度低于平均长度的0.75时,将字段向左/右合并,此处先标记合并后的新分割点
if word_length_set[j] < word_average_length*0.75:#1.25,0.75
if j != len(word_set)-1 and word_length_set[j-1] >= word_length_set[j+1]:#向右合并
if j-1 < 0:#考虑边缘情况
cut_position = word_set[j][0]
word_cut_position.append(cut_position)
word_Cluster2Right = 1
print 1
if j == len(word_set)-1 or word_length_set[j-1] < word_length_set[j+1]:#向左合并
if word_Cluster2Right == 1:#考虑前一块向右聚合的情况
cut_position = word_set[j][1]
word_cut_position.append(cut_position)
print 2
else:
if j-1 < 0:
cut_position = word_set[j][0]
word_cut_position.append(cut_position)
else:
if len(word_cut_position) == 0:
cut_position = word_set[j][1]
word_cut_position.append(cut_position)
else:
del word_cut_position[len(word_cut_position)-1]
cut_position = word_set[j][1]
word_cut_position.append(cut_position)
print 3
word_Cluster2Right = 0
else:
if j-1 < 0:#不合并,且在边缘位置
cut_position = word_set[j][0]
word_cut_position.append(cut_position)
cut_position = word_set[j][1]
word_cut_position.append(cut_position)
print 4
else:#不合并,不在边缘位置
cut_position = word_set[j][1]
word_cut_position.append(cut_position)
print 5
word_Cluster2Right = 0
#print word_cut_position
print word_cut_position
word_cluster_set = []
#word_cluster_count = 0
#根据合并后的新标记点对该行进行重新切分
for j in range(0,len(word_cut_position)-1):
word_cluster = (word_cut_position[j], word_cut_position[j+1], word_b_set[row_cut][0], word_b_set[row_cut][1])
word_cluster_set.append(word_cluster)
word_cluster_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cut_position[j]:x + word_cut_position[j+1]]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cut_position[j]:x + word_cut_position[j+1]]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
print word_cluster_set
else:
##### 切分 #####
word_over_TotalLength = 0
is_someword_over = 0
if len(word_set) == 1:#当为单字时
h = word_set[0][3] - word_set[0][2]
w = word_set[0][1] - word_set[0][0]
b = [0 for z in range(0, h)]
b_total = 0
single_cut = ~th2[y + word_set[0][2]:y + word_set[0][3], x + word_set[0][0]:x + word_set[0][1]]
#通过对整体水平、垂直投影量切去除无字空白部分
for j in range(0,h):
for i in range(0,w):
if single_cut[j,i] == 0:
b[j]+= 1
b_total+= 1
start_already = 0
b_before = 0
cut_finish = 0
for j in range(0,h):
b_before+= b[j]#统计该列前方的总水平投影量
b_after = b_total - b_before#统计该列后方的总水平投影量
if b_before > 4 and start_already == 0 and cut_finish != 1:
b_cut_start = j
start_already = 1
if start_already == 1 and b_after < 4 and cut_finish != 1:
b_cut_end = j
cut_finish = 1
a = [0 for z in range(0, w)]
a_total = 0
for j in range(0,w):
for i in range(0,h):
if single_cut[i,j] == 0:
a[j]+= 1
a_total+= 1
start_already = 0
a_before = 0
cut_finish = 0
for j in range(0,w):
a_before+= a[j]#统计该列前方的总垂直投影量
a_after = a_total - a_before#统计该列后方的总垂直投影量
if a_before > 4 and start_already == 0 and cut_finish != 1:
a_cut_start = j
start_already = 1
if start_already == 1 and a_after < 4 and cut_finish != 1:
a_cut_end = j
cut_finish = 1
#当垂直投影长度/水平投影长度大于2倍时,则切分为2个字段
print "len(word_set) == 1:",float(a_cut_end - a_cut_start)/(b_cut_end - b_cut_start)
if float(a_cut_end - a_cut_start)/(b_cut_end - b_cut_start) > 2:#1.8
word_part_length = int((a_cut_end - a_cut_start)/2)
for i in range(0,2):
word_cluster_cut = ~th2[y + b_cut_start + word_b_set[row_cut][0]:y + b_cut_end + word_b_set[row_cut][0], x + word_set[0][0] + a_cut_start + i*word_part_length:x + word_set[0][0] + a_cut_start + (i+1)*word_part_length]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + b_cut_start + word_b_set[row_cut][0]:y + b_cut_end + word_b_set[row_cut][0], x + word_set[0][0] + a_cut_start + i*word_part_length:x + word_set[0][0] + a_cut_start + (i+1)*word_part_length]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
else:#垂直投影长度/水平投影长度小于等于2倍时,则保持单字段
word_cluster_set = word_set
for j in range(0,len(word_cluster_set)):
word_cluster_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
else:#当为多个字时
for j in range(0,len(word_set)):#是否有超过最短字段长度2倍的字段
if word_length_set[j] > word_shortest_length*2.0 and len(word_set)>1:#1.75
word_over_TotalLength+= word_length_set[j]
is_someword_over = 1
#print word_over_TotalLength,word_average_length
if is_someword_over == 1:#若有超过最短字段2倍的字段,进入分割流程
#word_cluster_count = 0
word_cluster_set = word_set
for j in range(0,len(word_cluster_set)):
if word_length_set[j] > word_shortest_length*3.0 :#对长度超过最短字段长度3倍的字段进行平均切分
word_part_length = int(word_length_set[j]/math.ceil(float(word_length_set[j])/word_shortest_length))#word_average_length
for i in range(0,int(math.ceil(float(word_length_set[j])/word_shortest_length))):#word_average_length
word_cluster_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0] + i*word_part_length:x + word_cluster_set[j][0] + (i+1)*word_part_length]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0] + i*word_part_length:x + word_cluster_set[j][0] + (i+1)*word_part_length]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
else:#对长度不超过最短字段长度3倍的字段,保持原状
word_cluster_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
else:#若没有超过最短字段2倍的字段,则保持原状
word_cluster_set = word_set
for j in range(0,len(word_cluster_set)):
word_cluster_cut = ~th2[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
word_cluster_color_cut = shrinkTwoTimesTranslation_copy[y + word_b_set[row_cut][0]:y + word_b_set[row_cut][1], x + word_cluster_set[j][0]:x + word_cluster_set[j][1]]
cv.imwrite('cluster_word' + str(word_cluster_count) + '.jpg', word_cluster_cut)
word_cluster_cut_set.append(word_cluster_cut)
cv.imwrite('color_cluster' + str(word_cluster_count) + '.jpg', word_cluster_color_cut)
word_cluster_color_cut_set.append(word_cluster_color_cut)
word_cluster_count+=1
for j in range(0,w): #遍历每一列
for i in range((h-a[j]),h): #从该列应该变黑的最顶部的点开始向最底部涂黑
cut[i,j]=0 #涂黑
cv.imwrite('cut' + str(row_cut) + '.jpg', cut)
chinese_result = ''
last_cha = ''
last_cha_set = []
if is_ABO == 1:#若是在判断血型,因该Python文件用于识别籍贯部分的汉字,因此未用上该部分
### Last word detection ###
img = word_cluster_cut_set[len(word_cluster_cut_set)-1]
img_color = word_cluster_color_cut_set[len(word_cluster_cut_set)-1]
smaller = 0.9#0.9--normal,0.5--ABO
#Place to middle
h, w = img.shape
b = [0 for z in range(0, h)]
h_total = 0
h_num = 0
for j in range(0,h):
for i in range(0,w):
if img[j,i] == 0:
b[j]+= 1
h_num+= 1
h_total+= j
print "h_num:",h_num
if h_num != 0:
h_average = h_total//h_num
a = [0 for z in range(0, w)]
w_total = 0
w_num = 0
for j in range(0,w):
for i in range(0,h):
if img[i,j] == 0:
a[j]+= 1
w_num+= 1
w_total+= j
w_average = w_total//w_num
affineShrinkTranslation = np.array([[1, 0, int(w//2 - w_average)], [0, 1, int(h//2 - h_average)]], np.float32)
shrinkTwoTimesTranslation = cv.warpAffine(~img, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation = cv.resize(shrinkTwoTimesTranslation,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation.jpg', shrinkTwoTimesTranslation)
shrinkTwoTimesTranslation_color = cv.warpAffine(~img_color, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation_color = ~cv.resize(shrinkTwoTimesTranslation_color,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation_color.jpg', shrinkTwoTimesTranslation_color)
max_one = max(h,w)
bin = np.zeros((max_one,max_one), np.uint8)
bin.fill(0)
bin_color = np.zeros((max_one,max_one,3), np.uint8)
bin_color.fill(255)
rows_count = 0
for j in range(int(max_one//2-h*smaller/2), int(max_one//2+h*smaller/2)):
cols_count = 0
for i in range(int(max_one//2-w*smaller/2), int(max_one//2+w*smaller/2)):
if cols_count <= int(w*smaller)-1 and rows_count <= int(h*smaller)-1:
#print rows_count,cols_count,j,i
bin[j][i] = shrinkTwoTimesTranslation[rows_count][cols_count]
#print shrinkTwoTimesTranslation_color[rows_count][cols_count]
bin_color[j][i] = shrinkTwoTimesTranslation_color[rows_count][cols_count]
cols_count+= 1
rows_count+= 1
cv.imwrite('bin_ori.jpg', ~bin)
cv.imwrite('bin_ori_color.jpg', bin_color)
img = Image.fromarray(cv.cvtColor(bin_color,cv.COLOR_BGR2GRAY))
last_cha_set = chinese_ocr.inference(img, 1)
if len(last_cha_set) != 0:
#print len(last_cha_set)
if (last_cha_set[0] == 689 or last_cha_set[1] == 689 or last_cha_set[2] == 689):
last_cha = '型'
finding_len = len(word_cluster_cut_set) - 1
else:
last_cha = ''
finding_len = len(word_cluster_cut_set)
else:
last_cha = ''
finding_len = len(word_cluster_cut_set)
### Letter detection ###
for num in range(0,finding_len):
img = word_cluster_cut_set[num]
img_color = word_cluster_color_cut_set[num]
smaller = 0.5#0.9--normal,0.5--ABO
#Place to middle
h, w = img.shape
b = [0 for z in range(0, h)]
h_total = 0
h_num = 0
for j in range(0,h):
for i in range(0,w):
if img[j,i] == 0:
b[j]+= 1
h_num+= 1
h_total+= j
if h_num == 0:
continue
h_average = h_total//h_num
a = [0 for z in range(0, w)]
w_total = 0
w_num = 0
for j in range(0,w):
for i in range(0,h):
if img[i,j] == 0:
a[j]+= 1
w_num+= 1
w_total+= j
w_average = w_total//w_num
affineShrinkTranslation = np.array([[1, 0, int(w//2 - w_average)], [0, 1, int(h//2 - h_average)]], np.float32)
shrinkTwoTimesTranslation = cv.warpAffine(~img, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation = cv.resize(shrinkTwoTimesTranslation,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation.jpg', shrinkTwoTimesTranslation)
shrinkTwoTimesTranslation_color = cv.warpAffine(~img_color, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation_color = ~cv.resize(shrinkTwoTimesTranslation_color,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation_color.jpg', shrinkTwoTimesTranslation_color)
max_one = max(h,w)
bin = np.zeros((max_one,max_one), np.uint8)
bin.fill(0)
bin_color = np.zeros((max_one,max_one,3), np.uint8)
bin_color.fill(255)
rows_count = 0
for j in range(int(max_one//2-h*smaller/2), int(max_one//2+h*smaller/2)):
cols_count = 0
for i in range(int(max_one//2-w*smaller/2), int(max_one//2+w*smaller/2)):
if cols_count <= int(w*smaller)-1 and rows_count <= int(h*smaller)-1:
#print rows_count,cols_count,j,i
bin[j][i] = shrinkTwoTimesTranslation[rows_count][cols_count]
#print shrinkTwoTimesTranslation_color[rows_count][cols_count]
bin_color[j][i] = shrinkTwoTimesTranslation_color[rows_count][cols_count]
cols_count+= 1
rows_count+= 1
cv.imwrite('bin_ori.jpg', ~bin)
cv.imwrite('bin_ori_color.jpg', bin_color)
img = Image.fromarray(~bin)
#img = Image.fromarray(~bin)
ABO_result = ABO_mix.ABO_detection(img)
print ABO_result
chinese_result = chinese_result + str(ABO_result)
else:
#若不是在判断血型栏
for num in range(0,len(word_cluster_cut_set)):
img = word_cluster_cut_set[num]
img_color = word_cluster_color_cut_set[num]
smaller = 0.9#0.9--normal,0.5--ABO,0.8--giguan,单字缩小倍数
if smaller > 1:#若放大文字部分
h, w = img.shape
b = [0 for z in range(0, h)]
b_total = 0
for j in range(0,h):
for i in range(0,w):
if img[j,i] == 0:
b[j]+= 1
b_total+= 1
start_already = 0
b_before = 0
cut_finish = 0
for j in range(0,h):
b_before+= b[j]
b_after = b_total - b_before
if b_before > 4 and start_already == 0 and cut_finish != 1:
b_cut_start = j
start_already = 1
if start_already == 1 and b_after < 4 and cut_finish != 1:
b_cut_end = j
cut_finish = 1
a = [0 for z in range(0, w)]
a_total = 0
for j in range(0,w):
for i in range(0,h):
if img[i,j] == 0:
a[j]+= 1
a_total+= 1
start_already = 0
a_before = 0
cut_finish = 0
for j in range(0,w):
a_before+= a[j]
a_after = a_total - a_before
if a_before > 4 and start_already == 0 and cut_finish != 1:
a_cut_start = j
start_already = 1
if start_already == 1 and a_after < 4 and cut_finish != 1:
a_cut_end = j
cut_finish = 1
print a_cut_start,a_cut_end,b_cut_start,b_cut_end
#直接提取出矩形中有文字的部分,周围不留空白
roi_bigger = img[b_cut_start:b_cut_end,a_cut_start:a_cut_end]
cv.imwrite('roi_bigger.jpg', roi_bigger)
roi_bigger_color = img_color[b_cut_start:b_cut_end,a_cut_start:a_cut_end]
cv.imwrite('roi_bigger_color.jpg', roi_bigger_color)
bin = ~cv.resize(roi_bigger,(int((a_cut_end-a_cut_start)*smaller),int((b_cut_end-b_cut_start)*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation.jpg', bin)
bin_color = cv.resize(roi_bigger_color,(int((a_cut_end-a_cut_start)*smaller),int((b_cut_end-b_cut_start)*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation_color.jpg', bin_color)
else:#若缩小文字部分
#将文字部分平移到图像中央Place to middle
h, w = img.shape
b = [0 for z in range(0, h)]
h_total = 0
h_num = 0
for j in range(0,h):
for i in range(0,w):
if img[j,i] == 0:
b[j]+= 1
h_num+= 1
h_total+= j
if h_num == 0:
continue
h_average = h_total//h_num
a = [0 for z in range(0, w)]
w_total = 0
w_num = 0
for j in range(0,w):
for i in range(0,h):
if img[i,j] == 0:
a[j]+= 1
w_num+= 1
w_total+= j
w_average = w_total//w_num
#在二值化图像中,将文字部分平移到中央
affineShrinkTranslation = np.array([[1, 0, int(w//2 - w_average)], [0, 1, int(h//2 - h_average)]], np.float32)
shrinkTwoTimesTranslation = cv.warpAffine(~img, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation = cv.resize(shrinkTwoTimesTranslation,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation.jpg', shrinkTwoTimesTranslation)
#在灰度图像中,将文字部分平移到中央
shrinkTwoTimesTranslation_color = cv.warpAffine(~img_color, affineShrinkTranslation, (w, h))
shrinkTwoTimesTranslation_color = ~cv.resize(shrinkTwoTimesTranslation_color,(int(w*smaller),int(h*smaller)),interpolation=cv.INTER_AREA)
cv.imwrite('bin_shrinkTwoTimesTranslation_color.jpg', shrinkTwoTimesTranslation_color)
max_one = max(h,w)
bin = np.zeros((max_one,max_one), np.uint8)
bin.fill(0)
bin_color = np.zeros((max_one,max_one,3), np.uint8)
bin_color.fill(255)
#缩小文字部分,并放置到一张更大的空白图像中
rows_count = 0
for j in range(int(max_one//2-h*smaller/2), int(max_one//2+h*smaller/2)):
cols_count = 0
for i in range(int(max_one//2-w*smaller/2), int(max_one//2+w*smaller/2)):
if cols_count <= int(w*smaller)-1 and rows_count <= int(h*smaller)-1:
#print rows_count,cols_count,j,i
bin[j][i] = shrinkTwoTimesTranslation[rows_count][cols_count]
#print shrinkTwoTimesTranslation_color[rows_count][cols_count]
bin_color[j][i] = shrinkTwoTimesTranslation_color[rows_count][cols_count]
cols_count+= 1
rows_count+= 1
'''
horizontalsize = cols / scale
horizontalStructure = cv.getStructuringElement(cv.MORPH_RECT, (horizontalsize, 1))
erosion = cv.erode(th2,horizontalStructure,iterations = 1)
dilation = cv.dilate(erosion,horizontalStructure,iterations = 1)
'''
cv.imwrite('bin_ori.jpg', ~bin)
cv.imwrite('bin_ori_color.jpg', bin_color)
img = Image.fromarray(cv.cvtColor(bin_color,cv.COLOR_BGR2GRAY))#OpenCV Mat转PIL
#img = Image.fromarray(~bin)
if get_all == 1:
chinese_set = chinese_ocr.inference(img, get_all)#手写汉字识别,对三个识别结果全接收
print "chinese_set", chinese_set
else:
if get_all == 2 and len(word_cluster_cut_set) == 1:
chinese_set = chinese_ocr.inference(img, 1)#分割字段唯一时民族识别,接收三个识别结果对应的字典序号
chinese = chinese_ocr.inference(img, 0)#分割字段唯一时,接收识别出的以汉字字符形式表示的最佳结果
return_change = str(chinese)
print "chinese_set", chinese_set
else:
chinese = chinese_ocr.inference(img, get_all)#手写汉字识别,只接收识别出的以汉字字符形式表示的最佳结果
if chinese_num_lim < 3:#控制识别出的汉字个数
chinese_result = chinese_result + str(chinese)
chinese_num_lim+=1
if get_all == 1 or return_change != 0:
return return_change, chinese_set
if get_all != 1:
if last_cha == '型':
chinese_result = chinese_result + '型'
return_change = 2
#print "Chinese_ocr:",chinese_result
return return_change, chinese_result
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
Python
1
https://gitee.com/kingopq/chinese_ocr.git
[email protected]:kingopq/chinese_ocr.git
kingopq
chinese_ocr
chinese_ocr
master

搜索帮助