From 9d75a26c5c4dba5566cf2bed7f2e5f2fbd53791a Mon Sep 17 00:00:00 2001 From: JiageWang <1076050774@qq.com> Date: Wed, 21 Nov 2018 09:54:09 +0800 Subject: [PATCH] first commit --- hwdb.py | 60 ++++++++++++++++++++++++++ model.py | 57 ++++++++++++++++++++++++ process_gnt.py | 66 ++++++++++++++++++++++++++++ train.py | 115 +++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 298 insertions(+) create mode 100644 hwdb.py create mode 100644 model.py create mode 100644 process_gnt.py create mode 100644 train.py diff --git a/hwdb.py b/hwdb.py new file mode 100644 index 0000000..6ea71e9 --- /dev/null +++ b/hwdb.py @@ -0,0 +1,60 @@ +import os +import torch +import torch.utils.data +import torchvision.transforms as transforms +import torchvision.datasets as datasets +from PIL import Image +import numpy as np +import matplotlib.pyplot as plt + +class HWDB(object): + def __init__(self, path='./data'): + # 预处理过程 + transform = transforms.Compose([ + transforms.Grayscale(), + transforms.Lambda(lambda x: Image.fromarray(255 - np.array(x))), + transforms.CenterCrop(64), + transforms.ToTensor(), + ]) + + # + traindir = os.path.join(path, 'train') + testdir = os.path.join(path, 'test') + + self.trainset = datasets.ImageFolder(traindir, transform) + self.testset = datasets.ImageFolder(testdir, transform) + self.train_size = len(self.trainset) + self.test_size = len(self.testset) + + def get_sample(self, index=0): + sample = self.trainset[index] + sample_img, sample_label = sample + print(sample_img.size()) + return sample_img, sample_label + + def get_loader(self, batch_size=100): + train_loader = torch.utils.data.DataLoader( + self.trainset, batch_size=batch_size, shuffle=True) + test_loader = torch.utils.data.DataLoader( + self.testset, batch_size=batch_size, shuffle=True) + return train_loader, test_loader + + +if __name__ == '__main__': + dataset = HWDB() + for i in [1, 10, 2000, 6000, 1000]: + img, label = dataset.get_sample(i) + img = img[0] + plt.imshow(img, cmap='gray') + plt.show() + + train_loader, test_loader = dataset.get_loader() + for (img, label) in train_loader: + print(img) + print(label) + + + + + + diff --git a/model.py b/model.py new file mode 100644 index 0000000..7f42e25 --- /dev/null +++ b/model.py @@ -0,0 +1,57 @@ +import torch.nn as nn +from torch.nn import init +from torchvision.models.vgg import vgg16_bn +import numpy as np + + + +class ConvNet(nn.Module): + def __init__(self, num_classes): + super(ConvNet, self).__init__() + self.features = nn.Sequential( + nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1), + nn.ReLU(inplace=True), + nn.MaxPool2d(kernel_size=2, stride=2), + ) + self.classifier = nn.Sequential( + nn.Dropout(), + nn.Linear(512*4*4, 1024), + nn.ReLU(inplace=True), + nn.Linear(1024, num_classes), + ) + self.weight_init() + + def forward(self, x): + x = self.features(x) + x = x.view(x.size(0), -1) + x = self.classifier(x) + return x + + def weight_init(self): + for layer in self.features: + self._layer_init(layer) + for layer in self.classifier: + self._layer_init(layer) + + + def _layer_init(self, m): + # 使用isinstance来判断m属于什么类型 + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, np.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + # m中的weight,bias其实都是Variable,为了能学习参数以及后向传播 + m.weight.data.fill_(1) + m.bias.data.zero_() + elif isinstance(m, nn.Linear): + init.xavier_normal(m.weight) + diff --git a/process_gnt.py b/process_gnt.py new file mode 100644 index 0000000..e210779 --- /dev/null +++ b/process_gnt.py @@ -0,0 +1,66 @@ +import os +import numpy as np +import struct +from PIL import Image + + +data_dir = './data' +# train_data_dir = "../data/HWDB1.1trn_gnt" +train_data_dir = os.path.join(data_dir, 'HWDB1.1trn_gnt') +test_data_dir = os.path.join(data_dir, 'HWDB1.1tst_gnt') + + +def read_from_gnt_dir(gnt_dir=test_data_dir): + def one_file(f): + header_size = 10 + while True: + header = np.fromfile(f, dtype='uint8', count=header_size) + if not header.size: break + sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24) + tagcode = header[5] + (header[4]<<8) + width = header[6] + (header[7]<<8) + height = header[8] + (header[9]<<8) + if header_size + width*height != sample_size: + break + image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width)) + yield image, tagcode + for file_name in os.listdir(gnt_dir): + if file_name.endswith('.gnt'): + file_path = os.path.join(gnt_dir, file_name) + with open(file_path, 'rb') as f: + for image, tagcode in one_file(f): + yield image, tagcode + +char_set = set() +for _, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir): + tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312') + char_set.add(tagcode_unicode) +char_list = list(char_set) +char_dict = dict(zip(sorted(char_list), range(len(char_list)))) +print(len(char_dict)) +print("char_dict=",char_dict) + +import pickle +f = open('char_dict', 'wb') +pickle.dump(char_dict, f) +f.close() +train_counter = 0 +test_counter = 0 +for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir): + tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312') + im = Image.fromarray(image) + dir_name = './data/train/' + '%0.5d'%char_dict[tagcode_unicode] + if not os.path.exists(dir_name): + os.mkdir(dir_name) + im.convert('RGB').save(dir_name+'/' + str(train_counter) + '.png') + print("train_counter=",train_counter) + train_counter += 1 +# for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir): +# tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312') +# im = Image.fromarray(image) +# dir_name = './data/test/' + '%0.5d'%char_dict[tagcode_unicode] +# if not os.path.exists(dir_name): +# os.mkdir(dir_name) +# im.convert('RGB').save(dir_name+'/' + str(test_counter) + '.png') +# print("test_counter=",test_counter) +# test_counter += 1 diff --git a/train.py b/train.py new file mode 100644 index 0000000..12cffc6 --- /dev/null +++ b/train.py @@ -0,0 +1,115 @@ +import torch +import torch.nn as nn +import torch.optim as optim +from torch.autograd import Variable +import pickle +import numpy as np +from hwdb import HWDB +from convnet import ConvNet + + +def train(net, + criterion, + optimizer, + train_loader, + test_loarder, + epoch=10, + save_path='./pretrained_models/'): + def adjust_learning_rate(optimizer, decay_rate=.9): + for param_group in optimizer.param_groups: + param_group['lr'] = param_group['lr'] * decay_rate + print("开始训练...") + net.train() + #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5) + for epoch in range(epoch): + sum_loss = 0.0 + total = 0 + correct = 0 + if epoch/3 == 1: + adjust_learning_rate(optimizer, 0.5) + # 数据读取 + for i, (inputs, labels) in enumerate(train_loader): + # 梯度清零 + optimizer.zero_grad() + + # forward + backward + if torch.cuda.is_available(): + # inputs, labels = Variable(inputs.cuda(0)), Variable(labels.cuda(0)) + inputs = inputs.to('cuda') + labels = labels.to('cuda') + #print(inputs.device) + else: + print('cuda not available') + outputs = net(inputs) + loss = criterion(outputs, labels) + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum() + + loss.backward() + optimizer.step() + + #print(loss.item()) + # 每训练100个batch打印一次平均loss与acc + sum_loss += loss.item() + # if i % 100 == 99: + if i % 100 == 99: + loss = sum_loss/100 + print('epoch: %d, batch: %d loss: %.03f' + % (epoch + 11, i + 1, loss), end=',') + # 每跑完一次epoch测试一下准确率 + acc = 100 * correct / total + print('acc:%d%%' % (acc)) + total = 0 + correct = 0 + sum_loss = 0.0 + + print("epoch%d 训练结束, 正在保存模型..."%(epoch+11)) + torch.save(net.state_dict(), save_path+'handwriting_iter_%03d.pth' % (epoch + 11)) + if epoch%3 == 0: + with torch.no_grad(): + correct = 0 + total = 0 + for images, labels in test_loader: + images, labels = images.to('cuda'), labels.to('cuda') + outputs = net(images) + # 取得分最高的那个类 + _, predicted = torch.max(outputs.data, 1) + total += labels.size(0) + correct += (predicted == labels).sum() + print('correct number: ',correct) + print('totol number:', total) + acc = 100 * correct / total + print('第%d个epoch的识别准确率为:%d%%' % (epoch+11, acc)) + + + +if __name__ == "__main__": + # 超参数 + batch_size = 100 + + # 读取分类类别 + f = open('char_dict', 'rb') + class_dict = pickle.load(f) + num_classes = len(class_dict) + + # 读取数据 + dataset = HWDB() + print("训练集数据:", dataset.train_size) + print("测试集数据:", dataset.test_size) + train_loader, test_loader = dataset.get_loader(batch_size) + + + net = ConvNet(num_classes) + print('网络结构:\n', net) + if torch.cuda.is_available(): + net = net.cuda(0) + else: + print('cuda not available') + net.load_state_dict(torch.load('./pretrained_models/handwriting_iter_010.pth')) + criterion = nn.CrossEntropyLoss() + #optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005) + optimizer = optim.RMSprop(net.parameters(), lr=0.000005, momentum=0.9, weight_decay=0.0005) + train(net, criterion, optimizer, train_loader, test_loader) + +