first commit
This commit is contained in:
60
hwdb.py
Normal file
60
hwdb.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import os
|
||||
import torch
|
||||
import torch.utils.data
|
||||
import torchvision.transforms as transforms
|
||||
import torchvision.datasets as datasets
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
class HWDB(object):
|
||||
def __init__(self, path='./data'):
|
||||
# 预处理过程
|
||||
transform = transforms.Compose([
|
||||
transforms.Grayscale(),
|
||||
transforms.Lambda(lambda x: Image.fromarray(255 - np.array(x))),
|
||||
transforms.CenterCrop(64),
|
||||
transforms.ToTensor(),
|
||||
])
|
||||
|
||||
#
|
||||
traindir = os.path.join(path, 'train')
|
||||
testdir = os.path.join(path, 'test')
|
||||
|
||||
self.trainset = datasets.ImageFolder(traindir, transform)
|
||||
self.testset = datasets.ImageFolder(testdir, transform)
|
||||
self.train_size = len(self.trainset)
|
||||
self.test_size = len(self.testset)
|
||||
|
||||
def get_sample(self, index=0):
|
||||
sample = self.trainset[index]
|
||||
sample_img, sample_label = sample
|
||||
print(sample_img.size())
|
||||
return sample_img, sample_label
|
||||
|
||||
def get_loader(self, batch_size=100):
|
||||
train_loader = torch.utils.data.DataLoader(
|
||||
self.trainset, batch_size=batch_size, shuffle=True)
|
||||
test_loader = torch.utils.data.DataLoader(
|
||||
self.testset, batch_size=batch_size, shuffle=True)
|
||||
return train_loader, test_loader
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
dataset = HWDB()
|
||||
for i in [1, 10, 2000, 6000, 1000]:
|
||||
img, label = dataset.get_sample(i)
|
||||
img = img[0]
|
||||
plt.imshow(img, cmap='gray')
|
||||
plt.show()
|
||||
|
||||
train_loader, test_loader = dataset.get_loader()
|
||||
for (img, label) in train_loader:
|
||||
print(img)
|
||||
print(label)
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
57
model.py
Normal file
57
model.py
Normal file
@@ -0,0 +1,57 @@
|
||||
import torch.nn as nn
|
||||
from torch.nn import init
|
||||
from torchvision.models.vgg import vgg16_bn
|
||||
import numpy as np
|
||||
|
||||
|
||||
|
||||
class ConvNet(nn.Module):
|
||||
def __init__(self, num_classes):
|
||||
super(ConvNet, self).__init__()
|
||||
self.features = nn.Sequential(
|
||||
nn.Conv2d(1, 64, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2, stride=2),
|
||||
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2, stride=2),
|
||||
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2, stride=2),
|
||||
nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.MaxPool2d(kernel_size=2, stride=2),
|
||||
)
|
||||
self.classifier = nn.Sequential(
|
||||
nn.Dropout(),
|
||||
nn.Linear(512*4*4, 1024),
|
||||
nn.ReLU(inplace=True),
|
||||
nn.Linear(1024, num_classes),
|
||||
)
|
||||
self.weight_init()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.features(x)
|
||||
x = x.view(x.size(0), -1)
|
||||
x = self.classifier(x)
|
||||
return x
|
||||
|
||||
def weight_init(self):
|
||||
for layer in self.features:
|
||||
self._layer_init(layer)
|
||||
for layer in self.classifier:
|
||||
self._layer_init(layer)
|
||||
|
||||
|
||||
def _layer_init(self, m):
|
||||
# 使用isinstance来判断m属于什么类型
|
||||
if isinstance(m, nn.Conv2d):
|
||||
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
||||
m.weight.data.normal_(0, np.sqrt(2. / n))
|
||||
elif isinstance(m, nn.BatchNorm2d):
|
||||
# m中的weight,bias其实都是Variable,为了能学习参数以及后向传播
|
||||
m.weight.data.fill_(1)
|
||||
m.bias.data.zero_()
|
||||
elif isinstance(m, nn.Linear):
|
||||
init.xavier_normal(m.weight)
|
||||
|
||||
66
process_gnt.py
Normal file
66
process_gnt.py
Normal file
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
import numpy as np
|
||||
import struct
|
||||
from PIL import Image
|
||||
|
||||
|
||||
data_dir = './data'
|
||||
# train_data_dir = "../data/HWDB1.1trn_gnt"
|
||||
train_data_dir = os.path.join(data_dir, 'HWDB1.1trn_gnt')
|
||||
test_data_dir = os.path.join(data_dir, 'HWDB1.1tst_gnt')
|
||||
|
||||
|
||||
def read_from_gnt_dir(gnt_dir=test_data_dir):
|
||||
def one_file(f):
|
||||
header_size = 10
|
||||
while True:
|
||||
header = np.fromfile(f, dtype='uint8', count=header_size)
|
||||
if not header.size: break
|
||||
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
|
||||
tagcode = header[5] + (header[4]<<8)
|
||||
width = header[6] + (header[7]<<8)
|
||||
height = header[8] + (header[9]<<8)
|
||||
if header_size + width*height != sample_size:
|
||||
break
|
||||
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
|
||||
yield image, tagcode
|
||||
for file_name in os.listdir(gnt_dir):
|
||||
if file_name.endswith('.gnt'):
|
||||
file_path = os.path.join(gnt_dir, file_name)
|
||||
with open(file_path, 'rb') as f:
|
||||
for image, tagcode in one_file(f):
|
||||
yield image, tagcode
|
||||
|
||||
char_set = set()
|
||||
for _, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
|
||||
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
|
||||
char_set.add(tagcode_unicode)
|
||||
char_list = list(char_set)
|
||||
char_dict = dict(zip(sorted(char_list), range(len(char_list))))
|
||||
print(len(char_dict))
|
||||
print("char_dict=",char_dict)
|
||||
|
||||
import pickle
|
||||
f = open('char_dict', 'wb')
|
||||
pickle.dump(char_dict, f)
|
||||
f.close()
|
||||
train_counter = 0
|
||||
test_counter = 0
|
||||
for image, tagcode in read_from_gnt_dir(gnt_dir=train_data_dir):
|
||||
tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
|
||||
im = Image.fromarray(image)
|
||||
dir_name = './data/train/' + '%0.5d'%char_dict[tagcode_unicode]
|
||||
if not os.path.exists(dir_name):
|
||||
os.mkdir(dir_name)
|
||||
im.convert('RGB').save(dir_name+'/' + str(train_counter) + '.png')
|
||||
print("train_counter=",train_counter)
|
||||
train_counter += 1
|
||||
# for image, tagcode in read_from_gnt_dir(gnt_dir=test_data_dir):
|
||||
# tagcode_unicode = struct.pack('>H', tagcode).decode('gb2312')
|
||||
# im = Image.fromarray(image)
|
||||
# dir_name = './data/test/' + '%0.5d'%char_dict[tagcode_unicode]
|
||||
# if not os.path.exists(dir_name):
|
||||
# os.mkdir(dir_name)
|
||||
# im.convert('RGB').save(dir_name+'/' + str(test_counter) + '.png')
|
||||
# print("test_counter=",test_counter)
|
||||
# test_counter += 1
|
||||
115
train.py
Normal file
115
train.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
from torch.autograd import Variable
|
||||
import pickle
|
||||
import numpy as np
|
||||
from hwdb import HWDB
|
||||
from convnet import ConvNet
|
||||
|
||||
|
||||
def train(net,
|
||||
criterion,
|
||||
optimizer,
|
||||
train_loader,
|
||||
test_loarder,
|
||||
epoch=10,
|
||||
save_path='./pretrained_models/'):
|
||||
def adjust_learning_rate(optimizer, decay_rate=.9):
|
||||
for param_group in optimizer.param_groups:
|
||||
param_group['lr'] = param_group['lr'] * decay_rate
|
||||
print("开始训练...")
|
||||
net.train()
|
||||
#scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
|
||||
for epoch in range(epoch):
|
||||
sum_loss = 0.0
|
||||
total = 0
|
||||
correct = 0
|
||||
if epoch/3 == 1:
|
||||
adjust_learning_rate(optimizer, 0.5)
|
||||
# 数据读取
|
||||
for i, (inputs, labels) in enumerate(train_loader):
|
||||
# 梯度清零
|
||||
optimizer.zero_grad()
|
||||
|
||||
# forward + backward
|
||||
if torch.cuda.is_available():
|
||||
# inputs, labels = Variable(inputs.cuda(0)), Variable(labels.cuda(0))
|
||||
inputs = inputs.to('cuda')
|
||||
labels = labels.to('cuda')
|
||||
#print(inputs.device)
|
||||
else:
|
||||
print('cuda not available')
|
||||
outputs = net(inputs)
|
||||
loss = criterion(outputs, labels)
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
#print(loss.item())
|
||||
# 每训练100个batch打印一次平均loss与acc
|
||||
sum_loss += loss.item()
|
||||
# if i % 100 == 99:
|
||||
if i % 100 == 99:
|
||||
loss = sum_loss/100
|
||||
print('epoch: %d, batch: %d loss: %.03f'
|
||||
% (epoch + 11, i + 1, loss), end=',')
|
||||
# 每跑完一次epoch测试一下准确率
|
||||
acc = 100 * correct / total
|
||||
print('acc:%d%%' % (acc))
|
||||
total = 0
|
||||
correct = 0
|
||||
sum_loss = 0.0
|
||||
|
||||
print("epoch%d 训练结束, 正在保存模型..."%(epoch+11))
|
||||
torch.save(net.state_dict(), save_path+'handwriting_iter_%03d.pth' % (epoch + 11))
|
||||
if epoch%3 == 0:
|
||||
with torch.no_grad():
|
||||
correct = 0
|
||||
total = 0
|
||||
for images, labels in test_loader:
|
||||
images, labels = images.to('cuda'), labels.to('cuda')
|
||||
outputs = net(images)
|
||||
# 取得分最高的那个类
|
||||
_, predicted = torch.max(outputs.data, 1)
|
||||
total += labels.size(0)
|
||||
correct += (predicted == labels).sum()
|
||||
print('correct number: ',correct)
|
||||
print('totol number:', total)
|
||||
acc = 100 * correct / total
|
||||
print('第%d个epoch的识别准确率为:%d%%' % (epoch+11, acc))
|
||||
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 超参数
|
||||
batch_size = 100
|
||||
|
||||
# 读取分类类别
|
||||
f = open('char_dict', 'rb')
|
||||
class_dict = pickle.load(f)
|
||||
num_classes = len(class_dict)
|
||||
|
||||
# 读取数据
|
||||
dataset = HWDB()
|
||||
print("训练集数据:", dataset.train_size)
|
||||
print("测试集数据:", dataset.test_size)
|
||||
train_loader, test_loader = dataset.get_loader(batch_size)
|
||||
|
||||
|
||||
net = ConvNet(num_classes)
|
||||
print('网络结构:\n', net)
|
||||
if torch.cuda.is_available():
|
||||
net = net.cuda(0)
|
||||
else:
|
||||
print('cuda not available')
|
||||
net.load_state_dict(torch.load('./pretrained_models/handwriting_iter_010.pth'))
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
#optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.9, weight_decay=0.0005)
|
||||
optimizer = optim.RMSprop(net.parameters(), lr=0.000005, momentum=0.9, weight_decay=0.0005)
|
||||
train(net, criterion, optimizer, train_loader, test_loader)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user