Files
DeepHSV/dataset/generate_list_firmas.py
2019-05-06 17:43:25 +08:00

98 lines
2.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
@file: model.py
@time: 2018/4/17 15:03
@desc: Generate the list of data pairs
"""
import copy
import os
import sys
import numpy as np
image_dir = '/home/deeplearning/work/Deeplearning/dataset/writingID/offline/firmas/'
list_filename_train = '../experiments/data_list/firmas_pairs_c_train.txt'
list_filename_test = '../experiments/data_list/firmas_pairs_c_val.txt'
num_genuine = 24
num_forged = 30
# 生成数组l的全部组合长度k
def combine(l, k):
answers = []
one = [0] * k
def next_c(li=0, ni=0):
if ni == k:
answers.append(copy.copy(one))
return
for lj in range(li, len(l)):
one[ni] = l[lj]
next_c(lj + 1, ni + 1)
next_c()
return answers
# 生成两个数组间的全部组合
def combine_2list(list1, list2):
answers = []
for i1 in list1:
for i2 in list2:
answers.append([i1, i2])
return answers
def main(argv=None):
if argv is None:
argv = sys.argv
signers_list = os.listdir(image_dir)
list_file_train = open(list_filename_train, 'w')
list_file_test = open(list_filename_test, 'w')
for signer in signers_list:
list_file = list_file_train if int(signer) <= 3500 else list_file_test
genuine_genuine_suf = combine(list(range(1, num_genuine + 1)), 2)
for item in genuine_genuine_suf:
genuine0 = signer + '/c-' + signer + "-%02d" % (item[0]) + '.jpg'
genuine1 = signer + '/c-' + signer + "-%02d" % (item[1]) + '.jpg'
line = genuine0 + ' ' + genuine1 + ' 1\n'
list_file.write(line)
genuine_forged_suf = combine_2list(list(range(1, num_genuine + 1)), list(range(1, num_forged + 1)))
for item in genuine_forged_suf:
genuine = signer + '/c-' + signer + "-%02d" % (item[0]) + '.jpg'
forged = signer + '/cf-' + signer + "-%02d" % (item[1]) + '.jpg'
line = genuine + ' ' + forged + ' 0\n'
list_file.write(line)
"""随机伪造情况每个writer 和其他writer组合"""
random_forged_nums = 2880000
# random_forged_val_nums = 2880000 * 0.15
writers = np.arange(1, 4001, 1)
writers = np.split(writers, 2)
writers_part1 = writers[0]
writers_part2 = writers[1]
genuine_forged_suf = combine_2list(writers_part1, writers_part2)
np.random.shuffle(genuine_forged_suf)
i = 0
for item in genuine_forged_suf:
if i > random_forged_nums:
break
i += 1
list_file = list_file_train if i % 6 != 0 else list_file_test
genuine = '%03d' % item[0] + '/c-' + '%03d' % item[0] + "-09" + '.jpg'
forged = '%03d' % item[1] + '/c-' + '%03d' % item[1] + "-09" + '.jpg'
line = genuine + ' ' + forged + ' 2\n'
list_file.write(line)
list_file_train.close()
list_file_test.close()
if __name__ == "__main__":
sys.exit(main())