data ready
This commit is contained in:
BIN
._.DS_Store
Executable file
BIN
._.DS_Store
Executable file
Binary file not shown.
1
.gitignore
vendored
Executable file
1
.gitignore
vendored
Executable file
@@ -0,0 +1 @@
|
||||
.vscode/
|
||||
4
.idea/misc.xml
generated
Executable file
4
.idea/misc.xml
generated
Executable file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6" project-jdk-type="Python SDK" />
|
||||
</project>
|
||||
8
.idea/modules.xml
generated
Executable file
8
.idea/modules.xml
generated
Executable file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/ocrcn_tf2.iml" filepath="$PROJECT_DIR$/.idea/ocrcn_tf2.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
11
.idea/ocrcn_tf2.iml
generated
Executable file
11
.idea/ocrcn_tf2.iml
generated
Executable file
@@ -0,0 +1,11 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="PYTHON_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="TestRunnerService">
|
||||
<option name="PROJECT_TEST_RUNNER" value="Unittests" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/vcs.xml
generated
Executable file
6
.idea/vcs.xml
generated
Executable file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
251
.idea/workspace.xml
generated
Executable file
251
.idea/workspace.xml
generated
Executable file
@@ -0,0 +1,251 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="c86d3061-c2c8-42bb-882f-53f6373c7f88" name="Default" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/dataset/.gitignore" afterPath="$PROJECT_DIR$/dataset/.gitignore" />
|
||||
<change beforePath="$PROJECT_DIR$/dataset/casia_hwdb.py" afterPath="$PROJECT_DIR$/dataset/casia_hwdb.py" />
|
||||
<change beforePath="$PROJECT_DIR$/dataset/casia_hwdb_1.0_1.1.tfrecord" afterPath="$PROJECT_DIR$/dataset/casia_hwdb_1.0_1.1.tfrecord" />
|
||||
<change beforePath="$PROJECT_DIR$/dataset/charactors.txt" afterPath="$PROJECT_DIR$/dataset/charactors.txt" />
|
||||
<change beforePath="$PROJECT_DIR$/dataset/convert_to_tfrecord.py" afterPath="$PROJECT_DIR$/dataset/convert_to_tfrecord.py" />
|
||||
<change beforePath="$PROJECT_DIR$/dataset/get_hwdb_1.0_1.1.sh" afterPath="$PROJECT_DIR$/dataset/get_hwdb_1.0_1.1.sh" />
|
||||
<change beforePath="$PROJECT_DIR$/readme.md" afterPath="$PROJECT_DIR$/readme.md" />
|
||||
<change beforePath="$PROJECT_DIR$/sample.png" afterPath="$PROJECT_DIR$/sample.png" />
|
||||
<change beforePath="$PROJECT_DIR$/samples/.gitignore" afterPath="$PROJECT_DIR$/samples/.gitignore" />
|
||||
<change beforePath="$PROJECT_DIR$/samples/001-f.gnt" afterPath="$PROJECT_DIR$/samples/001-f.gnt" />
|
||||
<change beforePath="$PROJECT_DIR$/samples/sample.png" afterPath="$PROJECT_DIR$/samples/sample.png" />
|
||||
<change beforePath="$PROJECT_DIR$/tests.py" afterPath="$PROJECT_DIR$/tests.py" />
|
||||
</list>
|
||||
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
|
||||
<option name="TRACKING_ENABLED" value="true" />
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||
</component>
|
||||
<component name="FileEditorManager">
|
||||
<leaf>
|
||||
<file leaf-file-name="dataset_hwdb.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/dataset/dataset_hwdb.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="casia_hwdb.py" pinned="false" current-in-tab="true">
|
||||
<entry file="file://$PROJECT_DIR$/dataset/casia_hwdb.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="715">
|
||||
<caret line="70" column="26" lean-forward="true" selection-start-line="70" selection-start-column="26" selection-end-line="70" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="tests.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/tests.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
<file leaf-file-name="convert_to_tfrecord.py" pinned="false" current-in-tab="false">
|
||||
<entry file="file://$PROJECT_DIR$/dataset/convert_to_tfrecord.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-301">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</file>
|
||||
</leaf>
|
||||
</component>
|
||||
<component name="Git.Settings">
|
||||
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
|
||||
</component>
|
||||
<component name="IdeDocumentHistory">
|
||||
<option name="CHANGED_PATHS">
|
||||
<list>
|
||||
<option value="$PROJECT_DIR$/dataset/casia_hwdb.py" />
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="JsBuildToolGruntFileManager" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsBuildToolPackageJson" detection-done="true" sorting="DEFINITION_ORDER" />
|
||||
<component name="JsGulpfileManager">
|
||||
<detection-done>true</detection-done>
|
||||
<sorting>DEFINITION_ORDER</sorting>
|
||||
</component>
|
||||
<component name="ProjectFrameBounds">
|
||||
<option name="x" value="933" />
|
||||
<option name="y" value="28" />
|
||||
<option name="width" value="1538" />
|
||||
<option name="height" value="1412" />
|
||||
</component>
|
||||
<component name="ProjectView">
|
||||
<navigator currentView="ProjectPane" proportions="" version="1">
|
||||
<flattenPackages />
|
||||
<showMembers />
|
||||
<showModules />
|
||||
<showLibraryContents />
|
||||
<hideEmptyPackages />
|
||||
<abbreviatePackageNames />
|
||||
<autoscrollToSource />
|
||||
<autoscrollFromSource />
|
||||
<sortByType />
|
||||
<manualOrder />
|
||||
<foldersAlwaysOnTop value="true" />
|
||||
</navigator>
|
||||
<panes>
|
||||
<pane id="Scratches" />
|
||||
<pane id="ProjectPane">
|
||||
<subPane>
|
||||
<expand>
|
||||
<path>
|
||||
<item name="ocrcn_tf2" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="ocrcn_tf2" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
<path>
|
||||
<item name="ocrcn_tf2" type="b2602c69:ProjectViewProjectNode" />
|
||||
<item name="ocrcn_tf2" type="462c0819:PsiDirectoryNode" />
|
||||
<item name="dataset" type="462c0819:PsiDirectoryNode" />
|
||||
</path>
|
||||
</expand>
|
||||
<select />
|
||||
</subPane>
|
||||
</pane>
|
||||
<pane id="Scope" />
|
||||
</panes>
|
||||
</component>
|
||||
<component name="PropertiesComponent">
|
||||
<property name="nodejs_interpreter_path.stuck_in_default_project" value="undefined stuck path" />
|
||||
<property name="WebServerToolWindowFactoryState" value="false" />
|
||||
<property name="settings.editor.selected.configurable" value="preferences.editor" />
|
||||
</component>
|
||||
<component name="RunDashboard">
|
||||
<option name="ruleStates">
|
||||
<list>
|
||||
<RuleState>
|
||||
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
<RuleState>
|
||||
<option name="name" value="StatusDashboardGroupingRule" />
|
||||
</RuleState>
|
||||
</list>
|
||||
</option>
|
||||
</component>
|
||||
<component name="ShelveChangesManager" show_recycled="false">
|
||||
<option name="remove_strategy" value="false" />
|
||||
</component>
|
||||
<component name="SvnConfiguration">
|
||||
<configuration />
|
||||
</component>
|
||||
<component name="TaskManager">
|
||||
<task active="true" id="Default" summary="Default task">
|
||||
<changelist id="c86d3061-c2c8-42bb-882f-53f6373c7f88" name="Default" comment="" />
|
||||
<created>1559385971852</created>
|
||||
<option name="number" value="Default" />
|
||||
<option name="presentableId" value="Default" />
|
||||
<updated>1559385971852</updated>
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
<component name="ToolWindowManager">
|
||||
<frame x="933" y="28" width="1538" height="1412" extended-state="0" />
|
||||
<editor active="true" />
|
||||
<layout>
|
||||
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.22386059" sideWeight="0.5" order="-1" side_tool="false" content_ui="combo" />
|
||||
<window_info id="Docker" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="SciView" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
|
||||
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
|
||||
</layout>
|
||||
</component>
|
||||
<component name="TypeScriptGeneratedFilesManager">
|
||||
<option name="version" value="1" />
|
||||
</component>
|
||||
<component name="VcsContentAnnotationSettings">
|
||||
<option name="myLimit" value="2678400000" />
|
||||
</component>
|
||||
<component name="XDebuggerManager">
|
||||
<breakpoint-manager />
|
||||
<watches-manager />
|
||||
</component>
|
||||
<component name="editorHistoryManager">
|
||||
<entry file="file:///usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-7666">
|
||||
<caret line="309" column="6" lean-forward="false" selection-start-line="309" selection-start-column="6" selection-end-line="309" selection-end-column="6" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dataset/charactors.txt">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dataset/convert_to_tfrecord.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="-301">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dataset/dataset_hwdb.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/train.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/sample.png">
|
||||
<provider selected="true" editor-type-id="images">
|
||||
<state />
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/tests.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="0">
|
||||
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
<entry file="file://$PROJECT_DIR$/dataset/casia_hwdb.py">
|
||||
<provider selected="true" editor-type-id="text-editor">
|
||||
<state relative-caret-position="715">
|
||||
<caret line="70" column="26" lean-forward="true" selection-start-line="70" selection-start-column="26" selection-end-line="70" selection-end-column="26" />
|
||||
<folding />
|
||||
</state>
|
||||
</provider>
|
||||
</entry>
|
||||
</component>
|
||||
</project>
|
||||
BIN
dataset/.DS_Store
vendored
Executable file
BIN
dataset/.DS_Store
vendored
Executable file
Binary file not shown.
BIN
dataset/._.DS_Store
Executable file
BIN
dataset/._.DS_Store
Executable file
Binary file not shown.
3
dataset/.gitignore
vendored
Normal file → Executable file
3
dataset/.gitignore
vendored
Normal file → Executable file
@@ -1,2 +1,3 @@
|
||||
hwdb_raw/
|
||||
*.tfrecord
|
||||
*.tfrecord
|
||||
casia_hwdb.pyhwdb_11.tfrecord
|
||||
|
||||
102
dataset/casia_hwdb.py
Normal file → Executable file
102
dataset/casia_hwdb.py
Normal file → Executable file
@@ -6,9 +6,13 @@ we using this class to get .png and label from raw
|
||||
.gnt data
|
||||
|
||||
"""
|
||||
from alfred.dl.tf.common import mute_tf
|
||||
|
||||
mute_tf()
|
||||
import struct
|
||||
import numpy as np
|
||||
import cv2
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
class CASIAHWDBGNT(object):
|
||||
@@ -24,61 +28,57 @@ class CASIAHWDBGNT(object):
|
||||
with open(self.f_p, 'rb') as f:
|
||||
while True:
|
||||
header = np.fromfile(f, dtype='uint8', count=header_size)
|
||||
if not header.size:
|
||||
if not header.size:
|
||||
break
|
||||
sample_size = header[0] + (header[1]<<8) + (header[2]<<16) + (header[3]<<24)
|
||||
tagcode = header[5] + (header[4]<<8)
|
||||
width = header[6] + (header[7]<<8)
|
||||
height = header[8] + (header[9]<<8)
|
||||
if header_size + width*height != sample_size:
|
||||
sample_size = header[0] + (header[1] << 8) + (
|
||||
header[2] << 16) + (header[3] << 24)
|
||||
tagcode = header[5] + (header[4] << 8)
|
||||
width = header[6] + (header[7] << 8)
|
||||
height = header[8] + (header[9] << 8)
|
||||
if header_size + width * height != sample_size:
|
||||
break
|
||||
image = np.fromfile(f, dtype='uint8', count=width*height).reshape((height, width))
|
||||
image = np.fromfile(f, dtype='uint8',
|
||||
count=width * height).reshape(
|
||||
(height, width))
|
||||
yield image, tagcode
|
||||
|
||||
|
||||
def resize_padding_or_crop(target_size, ori_img, padding_value=255):
|
||||
if len(ori_img.shape) == 3:
|
||||
res = np.zeros([ori_img.shape[0], target_size, target_size])
|
||||
else:
|
||||
res = np.ones([target_size, target_size])*padding_value
|
||||
end_x = target_size
|
||||
end_y = target_size
|
||||
start_x = 0
|
||||
start_y = 0
|
||||
if ori_img.shape[0] < target_size:
|
||||
end_x = int((target_size + ori_img.shape[0])/2)
|
||||
if ori_img.shape[1] < target_size:
|
||||
end_y = int((target_size + ori_img.shape[1])/2)
|
||||
if ori_img.shape[0] < target_size:
|
||||
start_x = int((target_size - ori_img.shape[0])/2)
|
||||
if ori_img.shape[1] < target_size:
|
||||
start_y = int((target_size - ori_img.shape[1])/2)
|
||||
res[start_x:end_x, start_y:end_y] = ori_img
|
||||
return np.array(res, dtype=np.uint8)
|
||||
def parse_example(record):
|
||||
features = tf.io.parse_single_example(record,
|
||||
features={
|
||||
'label':
|
||||
tf.io.FixedLenFeature([], tf.int64),
|
||||
'image':
|
||||
tf.io.FixedLenFeature([], tf.string),
|
||||
})
|
||||
img = tf.io.decode_raw(features['image'], out_type=tf.uint8)
|
||||
label = tf.cast(features['label'], tf.int32)
|
||||
return img, label
|
||||
|
||||
|
||||
def load_ds():
|
||||
input_files = ['casia_hwdb_1.0_1.1.tfrecord']
|
||||
ds = tf.data.TFRecordDataset(input_files)
|
||||
ds = ds.map(parse_example)
|
||||
return ds
|
||||
|
||||
|
||||
def load_charactors():
|
||||
a = open('charactors.txt', 'r').readlines()
|
||||
return [i.strip() for i in a]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
gnt = CASIAHWDBGNT('samples/1001-f.gnt')
|
||||
|
||||
full_img = np.zeros([900, 900], dtype=np.uint8)
|
||||
charset = []
|
||||
i = 0
|
||||
for img, tagcode in gnt.get_data_iter():
|
||||
# cv2.imshow('rr', img)
|
||||
try:
|
||||
label = struct.pack('>H', tagcode).decode('gb2312')
|
||||
img_padded = resize_padding_or_crop(90, img)
|
||||
col_idx = i%10
|
||||
row_idx = i//10
|
||||
full_img[row_idx*90:(row_idx+1)*90, col_idx*90:(col_idx+1)*90] = img_padded
|
||||
charset.append(label.replace('\x00', ''))
|
||||
if i >= 99:
|
||||
cv2.imshow('rrr', full_img)
|
||||
cv2.imwrite('sample.png', full_img)
|
||||
cv2.waitKey(0)
|
||||
print(charset)
|
||||
break
|
||||
i += 1
|
||||
except Exception as e:
|
||||
# print(e.with_traceback(0))
|
||||
print('decode error')
|
||||
continue
|
||||
ds = load_ds()
|
||||
charactors = load_charactors()
|
||||
for img, label in ds.take(9):
|
||||
# start training on model...
|
||||
img = img.numpy()
|
||||
img = np.resize(img, (64, 64))
|
||||
print(img.shape)
|
||||
label = label.numpy()
|
||||
label = charactors[label]
|
||||
print(label)
|
||||
cv2.imshow('rr', img)
|
||||
cv2.waitKey(0)
|
||||
# break
|
||||
|
||||
0
dataset/charactors.txt
Normal file → Executable file
0
dataset/charactors.txt
Normal file → Executable file
40
dataset/convert_to_tfrecord.py
Normal file → Executable file
40
dataset/convert_to_tfrecord.py
Normal file → Executable file
@@ -7,6 +7,7 @@ import cv2
|
||||
from alfred.utils.log import logger as logging
|
||||
import tensorflow as tf
|
||||
import glob
|
||||
import os
|
||||
|
||||
|
||||
class CASIAHWDBGNT(object):
|
||||
@@ -39,20 +40,27 @@ def run():
|
||||
logging.info('got all {} gnt files.'.format(len(all_hwdb_gnt_files)))
|
||||
logging.info('gathering charset...')
|
||||
charset = []
|
||||
for gnt in all_hwdb_gnt_files:
|
||||
hwdb = CASIAHWDBGNT(gnt)
|
||||
for img, tagcode in hwdb.get_data_iter():
|
||||
try:
|
||||
label = struct.pack('>H', tagcode).decode('gb2312')
|
||||
label = label.replace('\x00', '')
|
||||
charset.append(label)
|
||||
except Exception as e:
|
||||
continue
|
||||
charset = sorted(set(charset))
|
||||
if os.path.exists('charactors.txt'):
|
||||
logging.info('found exist charactors.txt...')
|
||||
with open('charactors.txt', 'r') as f:
|
||||
charset = f.readlines()
|
||||
charset = [i.strip() for i in charset]
|
||||
else:
|
||||
for gnt in all_hwdb_gnt_files:
|
||||
hwdb = CASIAHWDBGNT(gnt)
|
||||
for img, tagcode in hwdb.get_data_iter():
|
||||
try:
|
||||
label = struct.pack('>H', tagcode).decode('gb2312')
|
||||
label = label.replace('\x00', '')
|
||||
charset.append(label)
|
||||
except Exception as e:
|
||||
continue
|
||||
charset = sorted(set(charset))
|
||||
with open('charactors.txt', 'w') as f:
|
||||
f.writelines('\n'.join(charset))
|
||||
logging.info('all got {} charactors.'.format(len(charset)))
|
||||
with open('charactors.txt', 'w') as f:
|
||||
f.writelines('\n'.join(charset))
|
||||
|
||||
logging.info('{}'.format(charset[:10]))
|
||||
|
||||
tfrecord_f = 'casia_hwdb_1.0_1.1.tfrecord'
|
||||
i = 0
|
||||
with tf.io.TFRecordWriter(tfrecord_f) as tfrecord_writer:
|
||||
@@ -60,7 +68,7 @@ def run():
|
||||
hwdb = CASIAHWDBGNT(gnt)
|
||||
for img, tagcode in hwdb.get_data_iter():
|
||||
try:
|
||||
img = cv.resize(img, (64, 64))
|
||||
img = cv2.resize(img, (64, 64))
|
||||
label = struct.pack('>H', tagcode).decode('gb2312')
|
||||
label = label.replace('\x00', '')
|
||||
index = charset.index(label)
|
||||
@@ -68,11 +76,11 @@ def run():
|
||||
example = tf.train.Example(features=tf.train.Features(
|
||||
feature={
|
||||
"label": tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
|
||||
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img]))
|
||||
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img.tobytes()]))
|
||||
}))
|
||||
tfrecord_writer.write(example.SerializeToString())
|
||||
if i%500:
|
||||
logging.info('solved {} examples.'.format(i))
|
||||
logging.info('solved {} examples. {}: {}'.format(i, label, index))
|
||||
i += 1
|
||||
except Exception as e:
|
||||
logging.error(e)
|
||||
|
||||
0
dataset/casia_hwdb_1.0_1.1.tfrecord → dataset/dataset_hwdb.py
Normal file → Executable file
0
dataset/casia_hwdb_1.0_1.1.tfrecord → dataset/dataset_hwdb.py
Normal file → Executable file
0
dataset/get_hwdb_1.0_1.1.sh
Normal file → Executable file
0
dataset/get_hwdb_1.0_1.1.sh
Normal file → Executable file
6
readme.md
Normal file → Executable file
6
readme.md
Normal file → Executable file
@@ -27,7 +27,13 @@
|
||||
|
||||
**更新**:
|
||||
实际上,由于单个汉字图片其实很小,差不多也就最大80x80的大小,这个大小不适合转成图片保存到本地,因此我们将hwdb原始的二进制保存为tfrecord。同时也方便后面训练,可以直接从tfrecord读取图片进行训练。
|
||||

|
||||
|
||||
在我们存储完成的时候大概处理了89万个汉字,总共汉字的空间是3755个汉字。由于我们暂时仅仅使用了1.0,所以还有大概3000个汉字没有加入进来,但是处理是一样。使用本仓库来生成你的tfrecord步骤如下:
|
||||
|
||||
- `cd dataset && python3 convert_to_tfrecord.py`, 请注意我们使用的是tf2.0;
|
||||
- 你需要修改对应的路径,等待生成完成,大概有89万个example,如果1.0和1.1都用,那估计得double。
|
||||
|
||||
|
||||
## Model
|
||||
|
||||
|
||||
0
sample.png
Normal file → Executable file
0
sample.png
Normal file → Executable file
|
Before Width: | Height: | Size: 71 KiB After Width: | Height: | Size: 71 KiB |
0
samples/.gitignore
vendored
Normal file → Executable file
0
samples/.gitignore
vendored
Normal file → Executable file
0
samples/001-f.gnt
Normal file → Executable file
0
samples/001-f.gnt
Normal file → Executable file
0
samples/sample.png
Normal file → Executable file
0
samples/sample.png
Normal file → Executable file
|
Before Width: | Height: | Size: 61 KiB After Width: | Height: | Size: 61 KiB |
Reference in New Issue
Block a user