-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathtest_model.py
229 lines (194 loc) · 6.04 KB
/
test_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''Risk2S'''
'''
字符型图片验证码识别完整过程及Python实现
https://www.cnblogs.com/beer/p/5672678.html
'''
import os
from PIL import Image
import numpy as np
import sys
def dir_check(path):
isExists = os.path.exists(path)
if isExists != True:
os.makedirs(path)
print('文件夹' + path + ' 创建成功')
else:
pass
def get_bin_table(threshold=140):
"""
获取灰度转二值的映射table
:param threshold:
:return:
"""
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
return table
def get_crop_imgs(img):
"""
按照图片的特点,进行切割,这个要根据具体的验证码来进行工作. # 见原理图
:param img:
:return:
"""
child_img_list = []
for i in range(4):
x = 2 + i * (8 + 5) # 见原理图
y = 0
child_img = img.crop((x, y, x + 8, y + 18))
child_img_list.append(child_img)
return child_img_list
def get_feature(img):
"""
获取指定图片的特征值,
1. 按照每排的像素点,高度为18,则有18个维度,然后为8列,总共144个维度
:param img_path:
:return:一个维度为18(高度)的列表
"""
width, height = img.size
pixel_cnt_list = []
height = 10
for y in range(height):
pix_cnt_x = 0
for x in range(width):
# == 0 如果按照示例程序无法提取特征值,如果没有出现,就看看当前像素点的值进行自我处理。
if img.getpixel((x, y)) < 240: # 黑色点
pix_cnt_x += 1
pixel_cnt_list.append(pix_cnt_x)
for x in range(width):
pix_cnt_y = 0
for y in range(height):
if img.getpixel((x, y)) < 240: # 黑色点
pix_cnt_y += 1
pixel_cnt_list.append(pix_cnt_y)
return pixel_cnt_list
# 得到特征值文件
# def get_fea_file(img_path,n,flag_print):
# 遍历图片文件
# for i in os.walk(img_path + '/' + str(n)):
# im_list = i[2]
# print(im_list)
# for im_file in im_list:
# im_name = './' + img_path + '/' + str(n) + '/' + im_file
# # print(im_name)
# im = Image.open(im_name)
# # 特征选择
# im_feature = get_feature(im)
# # print(im_feature)
# z_index = 1
# if os.path.exists(str(n) + '_feature.txt'):
# ctreate_file = False
# else:
# ctreate_file = True
# # f = open(str(n) + '_feature.txt', 'a')
# # f = open(str(n) + '_feature.txt', 'a')
# #单个特征值文件
# f = open('feature.txt', 'a')
# if ctreate_file:
# if flag_print:
# for z in im_feature:
# if z_index == 1:
# print(n, end=' ')
# f.write(str(n) + ' ')
# print(str(z_index) + ':' + str(z), end=' ')
# f.write(str(z_index) + ':' + str(z) + ' ')
# z_index += 1
# print('')
# f.write('\n')
# else:
# for z in im_feature:
# if z_index == 1:
# f.write(str(n) + ' ')
# f.write(str(z_index) + ':' + str(z) + ' ')
# z_index += 1
# f.write('\n')
# f.close()
def train_svm_model():
"""
训练并生成model文件
:return:
"""
y, x = svm_read_problem(svm_root + '/train_pix_feature_xy.txt')
model = svm_train(y, x)
svm_save_model(model_path, model)
def train_model_main2(model_path, file):
# LibSVM
# 按照 libSVM 指定的格式生成一组带特征值和标记值的向量文件
svm_path = r"C:\Python36\risk_down\libsvm"
sys.path.append(svm_path + r"\python")
import svmutil
# y, x = svmutil.svm_read_problem('./' + str(n) + '_feature.txt')
y, x = svmutil.svm_read_problem(file)
# ---------------------------------------------------
model = svmutil.svm_train(y[:50], x[:50], '-c 4')
# print(model_path,file.split('/')[2][0])
model_path = './' + model_path + '/' + file.split('/')[2][0] + "_feature.model"
svmutil.svm_save_model(model_path, model)
# p_label,p_acc,p_val = svmutil.svm_predict(y[0:], x[0:],model)
# print(p_label,p_acc,p_val)
# 得到测试的特征值文件
def get_fea_file(img_path, flag_print, model_path, total_name):
# 遍历图片文件
for i in os.walk(model_path):
img_path = i[0]
im_list = i[2]
# model文件
im_list = list(filter(lambda x: x[-3:] == 'txt', im_list))
g = open(total_name, 'a')
for file in im_list:
model_name = './' + model_path + '/' + file
f = open(model_name, 'r')
# 单个特征值文件
g.write(f.read())
return total_name
def svm_model_test(svm_root, model_path):
"""
使用测试集测试模型
:return:
"""
svm_path = r"C:\Python36\risk_down\libsvm"
sys.path.append(svm_path + r"\python")
import svmutil
# 训练总模型
y, x = svmutil.svm_read_problem('./' + 'total_feature.txt')
# model = svmutil.svm_train(y[:], x[:], '-c 4')
# svmutil.svm_save_model('total_feature.model', model)
# yt, xt = svmutil.svm_read_problem(svm_root + 'last_test_pix_xy_new.txt')
# model = svmutil.svm_load_model(model_path)
# p_label, p_acc, p_val = svmutil.svm_predict(yt, xt, model) # p_label即为识别的结果
# cnt = 0
# for item in p_label:
# print('%d' % item, end=',')
# cnt += 1
# if cnt % 8 == 0:
# print('')
import sys
def delete(filepath):
f = open(filepath, 'r')
fnew = open(filepath + '_new.txt', 'w') # 将结果存入新的文本中
for line in f.readlines(): # 对每一行先删除空格,\n等无用的字符,再检查此行是否长度为0
data = line.strip()
if len(data) != 0:
fnew.write(data)
fnew.write('\n')
f.close()
fnew.close()
def main():
img_path = "model_img"
model_path = "model_file"
total_name = 'total_feature.txt'
dir_check(img_path)
# 删除文本中的空行
delete(total_name)
# 是否打印特征值
# flag_print = True
flag_print = False
# 到达第二阶段 模型测试
# total_name = get_fea_file(img_path, flag_print, model_path, total_name)
# svm_model_test('./', "num_feature.model")
if __name__ == '__main__':
main()