This commit is contained in:
xxxxxx
2019-05-28 00:06:06 +08:00
commit aa1c91780a
6 changed files with 116 additions and 0 deletions

87
dataset/casia_hwdb.py Normal file
View File

@@ -0,0 +1,87 @@
"""
this is a wrapper handle CASIA_HWDB dataset
since original data is complicated
we using this class to get .png and label from raw
.gnt data
"""
import struct
import numpy as np
import cv2
class CASIAHWDBGNT(object):
"""
A .gnt file may contains many images and charactors
"""
def __init__(self, f_p):
self.f_p = f_p
def get_data_iter(self):
header_size = 10
with open(self.f_p, 'rb') as f:
while True:
header = np.fromfile(f, dtype='uint8', count=header_size)
if not header.size:
break
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
tagcode = header[5] + (header[4]<<8)
width = header[6] + (header[7]<<8)
height = header[8] + (header[9]<<8)
if header_size + width*height != sample_size:
break
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
yield image, tagcode
def resize_padding_or_crop(target_size, ori_img, padding_value=255):
if len(ori_img.shape) == 3:
res = np.zeros([ori_img.shape[0], target_size, target_size])
else:
res = np.ones([target_size, target_size])*padding_value
end_x = target_size
end_y = target_size
start_x = 0
start_y = 0
if ori_img.shape[0] < target_size:
end_x = int((target_size + ori_img.shape[0])/2)
if ori_img.shape[1] < target_size:
end_y = int((target_size + ori_img.shape[1])/2)
if ori_img.shape[0] < target_size:
start_x = int((target_size - ori_img.shape[0])/2)
if ori_img.shape[1] < target_size:
start_y = int((target_size - ori_img.shape[1])/2)
res[start_x:end_x, start_y:end_y] = ori_img
return np.array(res, dtype=np.uint8)
if __name__ == "__main__":
gnt = CASIAHWDBGNT('samples/1001-f.gnt')
full_img = np.zeros([800, 800], dtype=np.uint8)
charset = []
i = 0
for img, tagcode in gnt.get_data_iter():
cv2.imshow('rr', img)
try:
label = struct.pack('>H', tagcode).decode('gb2312')
cv2.waitKey(0)
print(label)
# img_padded = resize_padding_or_crop(80, img)
# col_idx = i%10
# row_idx = i//10
# full_img[row_idx*80:(row_idx+1)*80, col_idx*80:(col_idx+1)*80] = img_padded
# charset.append(label.replace('\x00', ''))
# if i >= 99:
# cv2.imshow('rrr', full_img)
# cv2.imwrite('sample.png', full_img)
# cv2.waitKey(0)
# print(charset)
# break
# i += 1
except Exception as e:
# print(e.with_traceback(0))
print('decode error')
continue

28
readme.md Normal file
View File

@@ -0,0 +1,28 @@
# TensorFlow 2.0 中文手写字识别
本项目实现了基于CNN的中文手写字识别并且采用标准的**tensorflow 2.0 api** 来构建!相比对简单的字母手写识别,本项目更能体现模型设计的精巧性和数据增强的熟练操作性,并且最终设计出来的模型可以直接应用于工业场合,比如 **票据识别**, **手写文本自动扫描**相比于百度api接口或者QQ接口等具有可优化性、免费性、本地性等优点。
## Data
在开始之前先介绍一下本项目所采用的数据信息。我们的数据全部来自于CASIA的开源中文手写字数据集该数据集分为两部分
- CASIA-HWDB新版本的HWDB我们仅仅使用1.0-1.2这是单字的数据集2.0-2.2是整张文本的数据集我们暂时不用单字里面包含了约7185个汉字以及171个英文字母、数字、标点符号等
- CASIA-OLHWDB老版本的HWDB格式一样包含了约7185个汉字以及171个英文字母、数字、标点符号等。
原始数据下载链接点击[这里](http://www.nlpr.ia.ac.cn/databases/handwriting/Offline_database.html).
由于原始数据过于复杂我们自己写了一个数据wrapper方便读取统一将其转换为类似于Dataframe (Pandas)的格式这样可以将一个字的特征和label方便的显示也可以十分方便的将手写字转换为图片采用CNN进行处理。这是我们展示的效果
<p align="center">
<img src="https://s2.ax1x.com/2019/05/27/VeFtZq.md.png" />
</p>
其对应的label为
```
['!', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '、', '。', '々', '…', '', '', '“', '”']
```
## Model
关于我们采用的OCR模型的构建我们大致采用的是比较先进的MobileNetV3架构同时设计了一个修改的过的MobileNetV3Big的更深网络。主要考虑模型的轻量型和表达能力。最终训练结果表明我们的模型可以在中文手写字上达到约99.8%的准确率。

1
samples/.gitignore vendored Normal file
View File

@@ -0,0 +1 @@
1001-f.gnt

BIN
samples/001-f.gnt Normal file

Binary file not shown.

BIN
samples/sample.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

0
tests.py Normal file
View File