Initial commit
This commit is contained in:
104
dataset/dataset_paris.py
Normal file
104
dataset/dataset_paris.py
Normal file
@@ -0,0 +1,104 @@
|
||||
"""
|
||||
@file: dataset_paris.py
|
||||
@time: 2018/7/31 15:03
|
||||
@desc:Create the input data pipeline using `tf.data`
|
||||
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
image_width = None
|
||||
image_height = None
|
||||
images_dir = None
|
||||
channels = 1
|
||||
|
||||
|
||||
def _read_image(filename, is_augment):
|
||||
image_string = tf.read_file(tf.string_join([images_dir, filename]))
|
||||
image_decoded = tf.image.decode_png(image_string, channels=channels)
|
||||
|
||||
true_constant = tf.constant(1, dtype=tf.int32, name="true_constant")
|
||||
image_decoded = tf.cond(tf.equal(true_constant, is_augment),
|
||||
lambda: tf.image.flip_left_right(image_decoded),
|
||||
lambda: image_decoded)
|
||||
image_resized = tf.image.resize_images(image_decoded, [image_width, image_height])
|
||||
return image_resized
|
||||
|
||||
|
||||
def _parse_function(item):
|
||||
is_aug = tf.string_to_number(item[3], out_type=tf.int32)
|
||||
image0 = _read_image(item[0], is_aug)
|
||||
image1 = _read_image(item[1], is_aug)
|
||||
|
||||
image = tf.concat([image0, image1], 2)
|
||||
|
||||
return image, tf.string_to_number(item[2])
|
||||
|
||||
|
||||
def _input_fn(params, is_training, is_augment=False, pos_repeating=1, only_label=None):
|
||||
"""Train input function.
|
||||
|
||||
Args:
|
||||
listfile_path: listfile has 3 item per line
|
||||
params: contains hyperparameters of the model (ex: data_dir, image's width and height.)
|
||||
"""
|
||||
listfile_path = params.signature_train_list if is_training else params.signature_val_list
|
||||
data = []
|
||||
shuffle_neg = []
|
||||
size_per_signer = params.positive_size + params.negative_size
|
||||
file = open(listfile_path)
|
||||
for i, line in enumerate(file.readlines()):
|
||||
items = line.split(' ')
|
||||
file0 = items[0]
|
||||
file1 = items[1]
|
||||
label = int(items[2])
|
||||
if (only_label is not None and label != only_label) or label == 2:
|
||||
continue
|
||||
|
||||
repeating = 1
|
||||
if is_training and pos_repeating > 0 and i % size_per_signer == 0:
|
||||
"""the number of positive/negative pairs is 276/996,
|
||||
so we need to expand positive pairs, or reduce the negative pairs"""
|
||||
shuffle_neg = np.arange(params.positive_size, size_per_signer)
|
||||
np.random.shuffle(shuffle_neg)
|
||||
shuffle_neg = shuffle_neg[:params.positive_size * pos_repeating]
|
||||
if is_training and pos_repeating > 0:
|
||||
"""expand positive pairs"""
|
||||
if label == 2:
|
||||
repeating = 1 if (i % params.negative_size) > params.positive_size * pos_repeating else 0
|
||||
repeating = 0
|
||||
elif label == 0:
|
||||
"""reduce negative pairs """
|
||||
repeating = 1 if i % size_per_signer in shuffle_neg else 0
|
||||
elif label == 1:
|
||||
repeating = pos_repeating
|
||||
|
||||
for j in range(repeating):
|
||||
"""file0, file1, label, is_augment"""
|
||||
data.append((file0, file1, label, 0))
|
||||
if is_augment and is_training:
|
||||
data.append((file0, file1, label, 1))
|
||||
# data.append((file1, file0, label))
|
||||
file.close()
|
||||
np.random.shuffle(data)
|
||||
print("examples of data: -> %d" % len(data))
|
||||
|
||||
dataset = tf.data.Dataset.from_tensor_slices(np.array(data))
|
||||
dataset = dataset.map(_parse_function, num_parallel_calls=params.num_parallel_calls)
|
||||
dataset = dataset.shuffle(10000)
|
||||
dataset = dataset.repeat(params.num_epochs)
|
||||
dataset = dataset.apply(tf.contrib.data.batch_and_drop_remainder(
|
||||
params.batch_size * params.num_gpus))
|
||||
dataset = dataset.prefetch(10)
|
||||
return dataset
|
||||
|
||||
|
||||
def input_fn(params, is_training, repeating=1, is_augment=False, only_label=None):
|
||||
global image_width, image_height, images_dir, channels
|
||||
image_width = params.image_width
|
||||
image_height = params.image_height
|
||||
images_dir = params.images_dir
|
||||
channels = params.channels
|
||||
return _input_fn(params, is_training, pos_repeating=repeating, is_augment=is_augment, only_label=only_label)
|
||||
|
||||
111
dataset/generate_list_bhsig260.py
Normal file
111
dataset/generate_list_bhsig260.py
Normal file
@@ -0,0 +1,111 @@
|
||||
"""
|
||||
@file: dataset_bhsig260.py
|
||||
@time: 2018/6/20 15:03
|
||||
@desc:Create the paris list of BHSig260 Database
|
||||
|
||||
"""
|
||||
|
||||
import copy
|
||||
import os
|
||||
import sys
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
|
||||
num_genuine = 24
|
||||
num_forged = 30
|
||||
|
||||
|
||||
# 生成数组l的全部组合(长度k)
|
||||
def combine(l, k):
|
||||
answers = []
|
||||
one = [0] * k
|
||||
|
||||
def next_c(li=0, ni=0):
|
||||
if ni == k:
|
||||
answers.append(copy.copy(one))
|
||||
return
|
||||
for lj in range(li, len(l)):
|
||||
one[ni] = l[lj]
|
||||
next_c(lj + 1, ni + 1)
|
||||
|
||||
next_c()
|
||||
return answers
|
||||
|
||||
|
||||
# 生成两个数组间的全部组合
|
||||
def combine_2list(list1, list2):
|
||||
answers = []
|
||||
for i1 in list1:
|
||||
for i2 in list2:
|
||||
answers.append([i1, i2])
|
||||
return answers
|
||||
|
||||
|
||||
def generate_list(data_dir, train_size, filename_pre, listfile_name):
|
||||
root_dir = os.path.basename(data_dir)
|
||||
signers_list = os.listdir(data_dir)
|
||||
list_file_train = open(listfile_name + '_train.txt', 'w')
|
||||
list_file_test = open(listfile_name + '_val.txt', 'w')
|
||||
|
||||
train_indexs = np.arange(0, len(signers_list), 1)
|
||||
np.random.shuffle(train_indexs)
|
||||
train_indexs = train_indexs[:train_size]
|
||||
|
||||
for i, signer in enumerate(signers_list):
|
||||
list_file = list_file_train if i in train_indexs else list_file_test
|
||||
genuine_genuine_suf = combine(list(range(1, num_genuine + 1)), 2)
|
||||
for item in genuine_genuine_suf:
|
||||
genuine0 = "%s/%s/%s-%d-G-%02d%s" % (root_dir, signer, filename_pre, int(signer), item[0], '.jpg')
|
||||
genuine1 = "%s/%s/%s-%d-G-%02d%s" % (root_dir, signer, filename_pre, int(signer), item[1], '.jpg')
|
||||
line = genuine0 + ' ' + genuine1 + ' 1\n'
|
||||
list_file.write(line)
|
||||
|
||||
genuine_forged_suf = combine_2list(list(range(1, num_genuine + 1)), list(range(1, num_forged + 1)))
|
||||
for item in genuine_forged_suf:
|
||||
genuine = "%s/%s/%s-%d-G-%02d%s" % (root_dir, signer, filename_pre, int(signer), item[0], '.jpg')
|
||||
forged = "%s/%s/%s-%d-F-%02d%s" % (root_dir, signer, filename_pre, int(signer), item[1], '.jpg')
|
||||
line = genuine + ' ' + forged + ' 0\n'
|
||||
list_file.write(line)
|
||||
|
||||
list_file_train.close()
|
||||
list_file_test.close()
|
||||
|
||||
|
||||
def rename(dir_path):
|
||||
for root, dirs, files in os.walk(dir_path):
|
||||
for file in files:
|
||||
if not file.endswith('.jpg'):
|
||||
continue
|
||||
new_filename = file.replace('-S-00', '-S-')
|
||||
new_filename = new_filename.replace('-S-0', '-S-')
|
||||
os.rename(os.path.join(root, file), os.path.join(root, new_filename))
|
||||
|
||||
|
||||
def tif_to_jpg(tif_dir, jpg_dir):
|
||||
for root, dirs, files in os.walk(tif_dir):
|
||||
to_dir = root.replace(tif_dir, jpg_dir)
|
||||
if not os.path.exists(to_dir):
|
||||
os.mkdir(to_dir)
|
||||
for file in files:
|
||||
if not file.endswith('.tif'):
|
||||
continue
|
||||
image = imageio.imread(os.path.join(root, file))
|
||||
jpg_file = file.replace('.tif', '.jpg')
|
||||
imageio.imwrite(os.path.join(to_dir, jpg_file), image)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
|
||||
rename('/home/deeplearning/work/Deeplearning/dataset/writingID/offline/BHSig260_jpgs/')
|
||||
# generate_list('/home/deeplearning/work/Deeplearning/dataset/writingID/offline/BHSig260_jpgs/Hindi', 100, 'H-S',
|
||||
# '../experiments/data_list/bhsig260_Hindi')
|
||||
# generate_list('/home/deeplearning/work/Deeplearning/dataset/writingID/offline/BHSig260_jpgs/Bengali', 50,
|
||||
# 'B-S',
|
||||
# '../experiments/data_list/bhsig260_Bengali')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
96
dataset/generate_list_cedar.py
Normal file
96
dataset/generate_list_cedar.py
Normal file
@@ -0,0 +1,96 @@
|
||||
"""
|
||||
@file: generate_list_cedar.py
|
||||
@time: 2018/6/20 15:03
|
||||
@desc:Create the paris list of CEDAR Database
|
||||
|
||||
"""
|
||||
|
||||
import copy
|
||||
import os
|
||||
import sys
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
|
||||
num_genuine = 24
|
||||
num_forged = 24
|
||||
|
||||
|
||||
# 生成数组l的全部组合(长度k)
|
||||
def combine(l, k):
|
||||
answers = []
|
||||
one = [0] * k
|
||||
|
||||
def next_c(li=0, ni=0):
|
||||
if ni == k:
|
||||
answers.append(copy.copy(one))
|
||||
return
|
||||
for lj in range(li, len(l)):
|
||||
one[ni] = l[lj]
|
||||
next_c(lj + 1, ni + 1)
|
||||
|
||||
next_c()
|
||||
return answers
|
||||
|
||||
|
||||
# 生成两个数组间的全部组合
|
||||
def combine_2list(list1, list2):
|
||||
answers = []
|
||||
for i1 in list1:
|
||||
for i2 in list2:
|
||||
answers.append([i1, i2])
|
||||
return answers
|
||||
|
||||
|
||||
def generate_list(train_size, listfile_name):
|
||||
signers_list = list(range(1, 56))
|
||||
list_file_train = open(listfile_name + '_train.txt', 'w')
|
||||
list_file_test = open(listfile_name + '_val.txt', 'w')
|
||||
|
||||
train_indexs = np.arange(0, len(signers_list), 1)
|
||||
np.random.shuffle(train_indexs)
|
||||
train_indexs = train_indexs[:train_size]
|
||||
|
||||
for i, signer in enumerate(signers_list):
|
||||
list_file = list_file_train if i in train_indexs else list_file_test
|
||||
genuine_genuine_suf = combine(list(range(1, num_genuine + 1)), 2)
|
||||
for item in genuine_genuine_suf:
|
||||
genuine0 = "%s%d_%d%s" % ('full_org/original_', int(signer), item[0], '.png')
|
||||
genuine1 = "%s%d_%d%s" % ('full_org/original_', int(signer), item[1], '.png')
|
||||
line = genuine0 + ' ' + genuine1 + ' 1\n'
|
||||
list_file.write(line)
|
||||
|
||||
genuine_forged_suf = combine_2list(list(range(1, num_genuine + 1)), list(range(1, num_forged + 1)))
|
||||
for item in genuine_forged_suf:
|
||||
genuine = "%s%d_%d%s" % ('full_org/original_', int(signer), item[0], '.png')
|
||||
forged = "%s%d_%d%s" % ('full_forg/forgeries_', int(signer), item[1], '.png')
|
||||
line = genuine + ' ' + forged + ' 0\n'
|
||||
list_file.write(line)
|
||||
|
||||
list_file_train.close()
|
||||
list_file_test.close()
|
||||
|
||||
|
||||
def tif_to_jpg(tif_dir, jpg_dir):
|
||||
for root, dirs, files in os.walk(tif_dir):
|
||||
to_dir = root.replace(tif_dir, jpg_dir)
|
||||
if not os.path.exists(to_dir):
|
||||
os.mkdir(to_dir)
|
||||
for file in files:
|
||||
if not file.endswith('.tif'):
|
||||
continue
|
||||
image = imageio.imread(os.path.join(root, file))
|
||||
jpg_file = file.replace('.tif', '.jpg')
|
||||
imageio.imwrite(os.path.join(to_dir, jpg_file), image)
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
|
||||
generate_list(50, '../experiments/data_list/cedar')
|
||||
# generate_list(100, '../experiments/data_list/bhsig260_Hindi')
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
97
dataset/generate_list_firmas.py
Normal file
97
dataset/generate_list_firmas.py
Normal file
@@ -0,0 +1,97 @@
|
||||
"""
|
||||
@file: model.py
|
||||
@time: 2018/4/17 15:03
|
||||
@desc: Generate the list of data pairs
|
||||
|
||||
"""
|
||||
|
||||
import copy
|
||||
import os
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
|
||||
image_dir = '/home/deeplearning/work/Deeplearning/dataset/writingID/offline/firmas/'
|
||||
list_filename_train = '../experiments/data_list/firmas_pairs_c_train.txt'
|
||||
list_filename_test = '../experiments/data_list/firmas_pairs_c_val.txt'
|
||||
num_genuine = 24
|
||||
num_forged = 30
|
||||
|
||||
|
||||
# 生成数组l的全部组合(长度k)
|
||||
def combine(l, k):
|
||||
answers = []
|
||||
one = [0] * k
|
||||
|
||||
def next_c(li=0, ni=0):
|
||||
if ni == k:
|
||||
answers.append(copy.copy(one))
|
||||
return
|
||||
for lj in range(li, len(l)):
|
||||
one[ni] = l[lj]
|
||||
next_c(lj + 1, ni + 1)
|
||||
|
||||
next_c()
|
||||
return answers
|
||||
|
||||
|
||||
# 生成两个数组间的全部组合
|
||||
def combine_2list(list1, list2):
|
||||
answers = []
|
||||
for i1 in list1:
|
||||
for i2 in list2:
|
||||
answers.append([i1, i2])
|
||||
return answers
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
|
||||
signers_list = os.listdir(image_dir)
|
||||
list_file_train = open(list_filename_train, 'w')
|
||||
list_file_test = open(list_filename_test, 'w')
|
||||
|
||||
for signer in signers_list:
|
||||
list_file = list_file_train if int(signer) <= 3500 else list_file_test
|
||||
genuine_genuine_suf = combine(list(range(1, num_genuine + 1)), 2)
|
||||
for item in genuine_genuine_suf:
|
||||
genuine0 = signer + '/c-' + signer + "-%02d" % (item[0]) + '.jpg'
|
||||
genuine1 = signer + '/c-' + signer + "-%02d" % (item[1]) + '.jpg'
|
||||
line = genuine0 + ' ' + genuine1 + ' 1\n'
|
||||
list_file.write(line)
|
||||
|
||||
genuine_forged_suf = combine_2list(list(range(1, num_genuine + 1)), list(range(1, num_forged + 1)))
|
||||
for item in genuine_forged_suf:
|
||||
genuine = signer + '/c-' + signer + "-%02d" % (item[0]) + '.jpg'
|
||||
forged = signer + '/cf-' + signer + "-%02d" % (item[1]) + '.jpg'
|
||||
line = genuine + ' ' + forged + ' 0\n'
|
||||
list_file.write(line)
|
||||
|
||||
"""随机伪造情况,每个writer 和其他writer组合"""
|
||||
random_forged_nums = 2880000
|
||||
# random_forged_val_nums = 2880000 * 0.15
|
||||
|
||||
writers = np.arange(1, 4001, 1)
|
||||
writers = np.split(writers, 2)
|
||||
writers_part1 = writers[0]
|
||||
writers_part2 = writers[1]
|
||||
genuine_forged_suf = combine_2list(writers_part1, writers_part2)
|
||||
np.random.shuffle(genuine_forged_suf)
|
||||
i = 0
|
||||
for item in genuine_forged_suf:
|
||||
if i > random_forged_nums:
|
||||
break
|
||||
i += 1
|
||||
list_file = list_file_train if i % 6 != 0 else list_file_test
|
||||
genuine = '%03d' % item[0] + '/c-' + '%03d' % item[0] + "-09" + '.jpg'
|
||||
forged = '%03d' % item[1] + '/c-' + '%03d' % item[1] + "-09" + '.jpg'
|
||||
line = genuine + ' ' + forged + ' 2\n'
|
||||
list_file.write(line)
|
||||
|
||||
list_file_train.close()
|
||||
list_file_test.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
27
dataset/params.json
Normal file
27
dataset/params.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"model": "Inception_2logits",
|
||||
"signature_train_list": "./experiments/data_list/firmas_pairs_c_train.txt",
|
||||
"signature_val_list": "./experiments/data_list/firmas_pairs_c_val.txt",
|
||||
"images_dir": "/home/deeplearning/work/Deeplearning/dataset/writingID/offline/firmas_binarized/",
|
||||
"is_augment": false,
|
||||
"learning_rate": 1e-5,
|
||||
"batch_size": 32,
|
||||
"num_epochs": 1,
|
||||
"use_batch_norm": true,
|
||||
"bn_momentum": 0.9,
|
||||
"margin": 5,
|
||||
"embedding_size": 64,
|
||||
"keep_prob": 0.4,
|
||||
"squared": false,
|
||||
"image_width": 220,
|
||||
"image_height": 155,
|
||||
"positive_size": 276,
|
||||
"negative_size": 720,
|
||||
"channels": 1,
|
||||
"num_parallel_calls": 4,
|
||||
"save_summary_steps": 100,
|
||||
"save_checkpoints_steps": 1000,
|
||||
"num_gpus": 3,
|
||||
"keep_checkpoint_max": 25,
|
||||
"eval_steps": 10
|
||||
}
|
||||
45
dataset/preprosess_images.py
Normal file
45
dataset/preprosess_images.py
Normal file
@@ -0,0 +1,45 @@
|
||||
# encoding: utf-8
|
||||
|
||||
"""
|
||||
@author: lichuang
|
||||
@license: (C) Copyright 2010, CFCA
|
||||
@file: preprosess_images.py
|
||||
@time: 2018/5/8 18:
|
||||
@desc: regularize images, binaries, turn into black background
|
||||
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
|
||||
import imageio
|
||||
import numpy as np
|
||||
|
||||
dir_to_process = '/home/deeplearning/work/Deeplearning/dataset/writingID/offline/firmas/'
|
||||
dir_processed = '/home/deeplearning/work/Deeplearning/dataset/writingID/offline/firmas_binarized/'
|
||||
|
||||
|
||||
def _normalize_images(images_dir, processed_dir, reverse):
|
||||
"""binaries, turn into black background """
|
||||
for root, dirs, files in os.walk(images_dir):
|
||||
for name in files:
|
||||
new_path = os.path.join(processed_dir, os.path.split(root)[-1])
|
||||
if not os.path.exists(new_path):
|
||||
os.mkdir(new_path)
|
||||
if name.lower().endswith('.jpg'):
|
||||
image = imageio.imread(os.path.join(root, name))
|
||||
image[np.where(image < 230)] = 0
|
||||
image[np.where(image >= 230)] = 255
|
||||
if reverse:
|
||||
image = 255 - image
|
||||
imageio.imwrite(os.path.join(new_path, name), image)
|
||||
print('all images processed!')
|
||||
|
||||
|
||||
def main(argv=None):
|
||||
if argv is None:
|
||||
argv = sys.argv
|
||||
_normalize_images(dir_to_process, dir_processed, False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user