add

2019-05-28 00:06:06 +08:00
commit aa1c91780a
6 changed files with 116 additions and 0 deletions
--- a/dataset/casia_hwdb.py
+++ b/dataset/casia_hwdb.py
@@ -0,0 +1,87 @@
 """
 this is a wrapper handle CASIA_HWDB dataset
 since original data is complicated
 we using this class to get .png and label from raw
 .gnt data
 """
 import struct
 import numpy as np
 import cv2
 class CASIAHWDBGNT(object):
    """
    A .gnt file may contains many images and charactors
    """
    def __init__(self, f_p):
        self.f_p = f_p
    def get_data_iter(self):
        header_size = 10
        with open(self.f_p, 'rb') as f:
            while True:
                header = np.fromfile(f, dtype='uint8', count=header_size)
                if not header.size: 
                    break
                sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
                tagcode = header[5] + (header[4]<<8)
                width = header[6] + (header[7]<<8)
                height = header[8] + (header[9]<<8)
                if header_size + width*height != sample_size:
                    break
                image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
                yield image, tagcode
 def resize_padding_or_crop(target_size, ori_img, padding_value=255):
    if len(ori_img.shape) == 3:
        res = np.zeros([ori_img.shape[0], target_size, target_size])
    else:
        res = np.ones([target_size, target_size])*padding_value
        end_x = target_size
        end_y = target_size
        start_x = 0
        start_y = 0
        if ori_img.shape[0] < target_size:
            end_x = int((target_size + ori_img.shape[0])/2)
        if ori_img.shape[1] < target_size:
            end_y = int((target_size + ori_img.shape[1])/2)
        if ori_img.shape[0] < target_size:
            start_x = int((target_size - ori_img.shape[0])/2)
        if ori_img.shape[1] < target_size:
            start_y = int((target_size - ori_img.shape[1])/2)
        res[start_x:end_x, start_y:end_y] = ori_img
        return np.array(res, dtype=np.uint8)
 if __name__ == "__main__":
    gnt = CASIAHWDBGNT('samples/1001-f.gnt')
    full_img = np.zeros([800, 800], dtype=np.uint8)
    charset = []
    i = 0
    for img, tagcode in gnt.get_data_iter():
        cv2.imshow('rr', img)
        try:
            label = struct.pack('>H', tagcode).decode('gb2312')
            cv2.waitKey(0)
            print(label)
            # img_padded = resize_padding_or_crop(80, img)
            # col_idx = i%10
            # row_idx = i//10
            # full_img[row_idx*80:(row_idx+1)*80, col_idx*80:(col_idx+1)*80] = img_padded
            # charset.append(label.replace('\x00', ''))
            # if i >= 99:
            #     cv2.imshow('rrr', full_img)
            #     cv2.imwrite('sample.png', full_img)
            #     cv2.waitKey(0)
            #     print(charset)
            #     break
            # i += 1
        except Exception as e:
            # print(e.with_traceback(0))
            print('decode error')
            continue
--- a/readme.md
+++ b/readme.md
@@ -0,0 +1,28 @@
 # TensorFlow 2.0 中文手写字识别
 本项目实现了基于CNN的中文手写字识别，并且采用标准的**tensorflow 2.0 api** 来构建！相比对简单的字母手写识别，本项目更能体现模型设计的精巧性和数据增强的熟练操作性，并且最终设计出来的模型可以直接应用于工业场合，比如 **票据识别**, **手写文本自动扫描** 等，相比于百度api接口或者QQ接口等，具有可优化性、免费性、本地性等优点。
 ## Data
 在开始之前，先介绍一下本项目所采用的数据信息。我们的数据全部来自于CASIA的开源中文手写字数据集，该数据集分为两部分：
 - CASIA-HWDB：新版本的HWDB，我们仅仅使用1.0-1.2，这是单字的数据集，2.0-2.2是整张文本的数据集，我们暂时不用，单字里面包含了约7185个汉字以及171个英文字母、数字、标点符号等；
 - CASIA-OLHWDB：老版本的HWDB，格式一样，包含了约7185个汉字以及171个英文字母、数字、标点符号等。
 原始数据下载链接点击[这里](http://www.nlpr.ia.ac.cn/databases/handwriting/Offline_database.html).
 由于原始数据过于复杂，我们自己写了一个数据wrapper方便读取，统一将其转换为类似于Dataframe (Pandas)的格式，这样可以将一个字的特征和label方便的显示，也可以十分方便的将手写字转换为图片，采用CNN进行处理。这是我们展示的效果：
 <p align="center">
 <img src="https://s2.ax1x.com/2019/05/27/VeFtZq.md.png" />
 </p>
 其对应的label为：
 ```
 ['!', '"', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~', '、', '。', '々', '…', '‘', '’', '“', '”']
 ```
 ## Model
 关于我们采用的OCR模型的构建，我们大致采用的是比较先进的MobileNetV3架构，同时设计了一个修改的过的MobileNetV3Big的更深网络。主要考虑模型的轻量型和表达能力。最终训练结果表明，我们的模型可以在中文手写字上达到约99.8%的准确率。
--- a/samples/.gitignore
+++ b/samples/.gitignore
@@ -0,0 +1 @@
 1001-f.gnt
--- a/samples/001-f.gnt
+++ b/samples/001-f.gnt
--- a/samples/sample.png
+++ b/samples/sample.png
--- a/tests.py
+++ b/tests.py