项目基于python+CNN+Tensorflow,模型训练中使用Tensorflow CPU版本,只要你的机器内存8G以上,就能够按照文章描述的替换训练样本为你本身的样本、简单修改模型几个参数就能够训练出一个指望的模型。html
识别模型效果与训练样本的质量、数量有直接关系,就目前我本身多个验证码识别模型训练经验来讲,大小写+数字这种格式的验证码识别,训练样本个数只要超过一万基本就能够获得一个80%准确率的模型;若是依据具体验证码的特色经过图像处理作一些简单处理以后在去训练和识别,样本量还能够更少。上图中的第五种验证码和第六种验证码在作必定处理以后2000张样本就能够获得90%的识别准确率。python
人工打码是最多见的一种办法,目前网络上有多家公司提供打码服务,基于他们的服务就能够批量的对咱们的样本作标注。可是问题也比较明显,首先打码平台须要付费须要必定成本、其次标注的数据有一部分错误数据,错误数据对模型最终的效果有必定的影响。错误标签能够基于必定的逻辑来规避,好比验证标注的数据是否正确,这个你们本身脑补便可。git
分析验证码的特色,经过程序模拟生成相似的甚至彻底同样的验证码;技术要求高、可是能够得到无群训练样本github
其余类型的验证码能够依据下面代码作必定修改便可生成对应格式的验证码express
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/10/12 10:01
# @Author : shm
# @Site :
# @File : create_yzm.py
# @Software: PyCharm
import random
from PIL import Image,ImageDraw,ImageFont
def getRandomColor():
''' 生成随机颜色 :return: '''
r = random.randint(0,255)
g = random.randint(0,255)
b = random.randint(0,255)
return (r,g,b)
def getRandomChar():
''' 生成随机字符 :return: '''
charlist = "123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijkmnopqrstuvwxyz"
random_char = random.choice(charlist)
return random_char
def genImg(width,height,font_size,chr_num):
''' 生成一张width*height图片 :param width:图片宽度 :param height:图片高度 :param font_size: 字体大小 :param chr_num: 字符数 :return: '''
#bg_color = getRandomColor()
bg_color = (255,255,255) #白色背景
#建立一张随机背景色图片
img = Image.new(mode="RGB",size=(width,height),color=bg_color)
#获取图片画笔、用于描绘字
draw = ImageDraw.Draw(img)
#修改字体
font = ImageFont.truetype(font="Action Jackson",size=font_size)
#font = ImageFont.truetype(font="华文彩云", size=font_size)
for i in range(chr_num):
#随机生成5种字符+5种颜色
random_txt = getRandomChar()
#txt_color = getRandomColor()
txt_color = (0,0,255) #蓝色字体
# while txt_color == bg_color:
# txt_color = getRandomColor()
draw.text((36+16*i,5),text=random_txt,fill=txt_color,font=font)
#画干扰线
drawLine(draw,width,height)
#画噪点
drawPoint(draw,width,height)
return img
def drawLine(draw,width,height):
''' 随机画线 :param draw: :param width: :param height: :return: '''
for i in range(10):
x1 = random.randint(0, width)
#x2 = random.randint(0,width-x1)
x2 = x1+random.randint(0,25)
y1 = random.randint(0, height)
y2 = y1
#y2 = random.randint(0, height)
#draw.line((x1, y1, x2, y2), fill=getRandomColor())
draw.line((x1, y1, x2, y2), fill=(0,0,255))
def drawPoint(draw,width,height):
''' 添加噪点 :param draw: :param width: :param height: :return: '''
for i in range(5):
x = random.randint(0, 40)
y = random.randint(0, height)
#draw.point((x, y), fill=getRandomColor())
draw.point((x, y), fill=(0,0,255))
def drawOther(draw):
''' 添加自定义噪声 :return: '''
pass
def genyzm():
''' 生成验证码 :param path: :return: '''
#图片宽度
width = 106
#图片高度
height = 30
#字体大小
font_size = 20
#字符个数
chr_num = 4
#验证码存储位置
path = "./yzm_pic/"
for i in range(10):
img = genImg(width,height,font_size,chr_num)
dir = path + str(i)+".png"
with open(dir,"wb") as fp:
img.save(fp,format="png")
if __name__=="__main__":
try:
genyzm()
except Exception as e:
print(e)
复制代码
第二种汉字+数字+字母格式的验证码的生成代码此处就不直接贴出来了,这个验证码目前有网站在使用,为了避免影响网站正常使用,此处不开源具体python代码,总体思路和上面代码相似,只是背景不是单一颜色,字符中加入了中文而已。apache
如下面两种验证码为例进行说明 flask
图一中的验证码大小106X30且字符都集中在右边,经过windows自带的画图工具打开,以后,发现字符集中在36-100这一区段,因此首先对图片截取36-106区间图片,截取以后图片大小64*30 结果以下图: windows
def screen_shot(src,dstpath):
''' 图片预处理,截取图片主要区域 :param src:源图片地址 :param dstpath:目标图片地址 :return: '''
try:
img = Image.open(src)
s = os.path.split(src)
fn = s[1].split(".")
basename = fn[0]
ext = fn[-1]
box = (36, 0, 100, 30)
dstdir = dstpath + basename + "." + ext
img.crop(box).save(dstdir)
except Exception as e:
print("screenshot:",e)
复制代码
图二验证码大小100X38字符均匀分布,无需额外处理bash
对图一处理以后的图片作均匀切割,每一个验证码图片分割成四个小图片,分割以后的结果以下图: 网络
def split_image(src,rownum,colnum,dstpath):
''' 切分图片 :param src: :param rownum: :param colnum: :param dstpath: :return: '''
try:
img = Image.open(src)
w,h = img.size
if rownum <= h and colnum<=w:
s = os.path.split(src)
fn = s[1].split(".")
basename = fn[0]
ext = fn[-1]
rowheight = h // rownum
colwidth = w // colnum
num = 0
for r in range(rownum):
for c in range(colnum):
name = str(basename[c:c+1])
t = str(int(time.time()*100000))
box = (c*colwidth,r*rowheight,(c+1)*colwidth,(r+1)*rowheight)
img.crop(box).save(dstpath+name+"/"+name+"#"+t+"."+ext)
num = num + 1
print("图片切割完毕,共生成%s张小图片" % num)
else:
print("不合法的行列切割参数")
except Exception as e:
print("e:",e)
复制代码
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains a models definition for AlexNet. This work was first described in: ImageNet Classification with Deep Convolutional Neural Networks Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton and later refined in: One weird trick for parallelizing convolutional neural networks Alex Krizhevsky, 2014 Here we provide the implementation proposed in "One weird trick" and not "ImageNet Classification", as per the paper, the LRN layers have been removed. Usage: with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): outputs, end_points = alexnet.alexnet_v2(inputs) @@alexnet_v2 """
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
def alexnet_v2_arg_scope(weight_decay=0.0005):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.constant_initializer(0.1),
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def alexnet_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='alexnet_v2'):
"""AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the models is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
# Use conv2d instead of fully_connected layers.
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net0 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_0')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net0 = tf.squeeze(net0, [1, 2], name='fc8_0/squeezed')
end_points[sc.name + '/fc8_0'] = net0
return net0, end_points
alexnet_v2.default_image_size = 224
复制代码
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
slim = tf.contrib.slim
trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)
def alexnet_v2_arg_scope(weight_decay=0.0005):
with slim.arg_scope([slim.conv2d, slim.fully_connected],
activation_fn=tf.nn.relu,
biases_initializer=tf.constant_initializer(0.1),
weights_regularizer=slim.l2_regularizer(weight_decay)):
with slim.arg_scope([slim.conv2d], padding='SAME'):
with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
return arg_sc
def alexnet_v2(inputs,
num_classes=1000,
is_training=True,
dropout_keep_prob=0.5,
spatial_squeeze=True,
scope='alexnet_v2'):
"""AlexNet version 2. Described in: http://arxiv.org/pdf/1404.5997v2.pdf Parameters from: github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ layers-imagenet-1gpu.cfg Note: All the fully_connected layers have been transformed to conv2d layers. To use in classification mode, resize input to 224x224. To use in fully convolutional mode, set spatial_squeeze to false. The LRN layers have been removed and change the initializers from random_normal_initializer to xavier_initializer. Args: inputs: a tensor of size [batch_size, height, width, channels]. num_classes: number of predicted classes. is_training: whether or not the model is being trained. dropout_keep_prob: the probability that activations are kept in the dropout layers during training. spatial_squeeze: whether or not should squeeze the spatial dimensions of the outputs. Useful to remove unnecessary dimensions for classification. scope: Optional scope for the variables. Returns: the last op containing the log predictions and end_points dict. """
with tf.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
end_points_collection = sc.name + '_end_points'
# Collect outputs for conv2d, fully_connected and max_pool2d.
with slim.arg_scope([slim.conv2d, slim.fully_connected, slim.max_pool2d],
outputs_collections=[end_points_collection]):
net = slim.conv2d(inputs, 64, [11, 11], 4, padding='VALID',
scope='conv1')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool1')
net = slim.conv2d(net, 192, [5, 5], scope='conv2')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool2')
net = slim.conv2d(net, 384, [3, 3], scope='conv3')
net = slim.conv2d(net, 384, [3, 3], scope='conv4')
net = slim.conv2d(net, 256, [3, 3], scope='conv5')
net = slim.max_pool2d(net, [3, 3], 2, scope='pool5')
# Use conv2d instead of fully_connected layers.
with slim.arg_scope([slim.conv2d],
weights_initializer=trunc_normal(0.005),
biases_initializer=tf.constant_initializer(0.1)):
net = slim.conv2d(net, 4096, [5, 5], padding='VALID',
scope='fc6')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout6')
net = slim.conv2d(net, 4096, [1, 1], scope='fc7')
net = slim.dropout(net, dropout_keep_prob, is_training=is_training,
scope='dropout7')
net0 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_0')
net1 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_1')
net2 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_2')
net3 = slim.conv2d(net, num_classes, [1, 1],
activation_fn=None,
normalizer_fn=None,
biases_initializer=tf.zeros_initializer(),
scope='fc8_3')
# Convert end_points_collection into a end_point dict.
end_points = slim.utils.convert_collection_to_dict(end_points_collection)
if spatial_squeeze:
net0 = tf.squeeze(net0, [1, 2], name='fc8_0/squeezed')
end_points[sc.name + '/fc8_0'] = net0
net1 = tf.squeeze(net1, [1, 2], name='fc8_1/squeezed')
end_points[sc.name + '/fc8_1'] = net1
net2 = tf.squeeze(net2, [1, 2], name='fc8_2/squeezed')
end_points[sc.name + '/fc8_2'] = net2
net3 = tf.squeeze(net3, [1, 2], name='fc8_3/squeezed')
end_points[sc.name + '/fc8_3'] = net3
return net0,net1,net2,net3,end_points
alexnet_v2.default_image_size = 224
复制代码
TFrecord训练数据生成代码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import tensorflow as tf
import os
import random
import math
import sys
from PIL import Image
import numpy as np
_NUM_TEST = 500
_RANDOM_SEED = 0
MAX_CAPTCHA = 1
#切分以后的单个字符图片存储位置,文件名称以图片实际对应字符命名
DATASET_DIR = "./split_img/yzm"
#训练数据存放位置
TFRECORD_DIR = './TFrecord/'
def _dataset_exists(dataset_dir):
for split_name in ['train', 'test']:
output_filename = os.path.join(dataset_dir, split_name + '.tfrecords')
if not tf.gfile.Exists(output_filename):
return False
return True
def _get_filenames_and_classes(dataset_dir):
photo_filenames = []
for filename in os.listdir(dataset_dir):
path = os.path.join(dataset_dir, filename)
photo_filenames.append(path)
return photo_filenames
def int64_feature(values):
if not isinstance(values, (tuple, list)):
values = [values]
return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
def bytes_feature(values):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
def image_to_tfexample(image_data, label0):
# Abstract base class for protocol messages.
return tf.train.Example(features=tf.train.Features(feature={
'image': bytes_feature(image_data),
'label0': int64_feature(label0)
}))
def char2pos(c):
if c == '_':
k = 62
return k
k = ord(c) - 48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - 61
if k > 61:
raise ValueError('No Map')
return k
def char2pos1(c):
if c == '_':
k = 36
return k
k = ord(c) - 48
if k > 9:
k = ord(c) - 55
if k > 35:
k = ord(c) - (61 + 26)
if k > 36:
raise ValueError('No Map')
return k
def _convert_dataset(split_name, filenames, dataset_dir):
assert split_name in ['train', 'test']
with tf.Session() as sess:
output_filename = os.path.join(TFRECORD_DIR, split_name + '.tfrecords')
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
for i, filename in enumerate(filenames):
try:
sys.stdout.write('\r>> Converting image %d/%d' % (i + 1, len(filenames)))
sys.stdout.flush()
image_data = Image.open(filename)
image_data = image_data.resize((224, 224))
image_data = np.array(image_data.convert('L'))
image_data = image_data.tobytes()
labels = filename.split('\\')[-1][0:1]
print(labels)
num_labels = []
num_labels.append(int(char2pos1(labels)))
example = image_to_tfexample(image_data, num_labels[0])
tfrecord_writer.write(example.SerializeToString())
# for j in range(4): //四字符用
# num_labels.append(int(char2pos1(labels[j])))
# example = image_to_tfexample(image_data, num_labels[0], num_labels[1], num_labels[2], num_labels[3])
# tfrecord_writer.write(example.SerializeToString())
except IOError as e:
print('Could not read:', filename)
print('Error:', e)
print('Skip it\n')
sys.stdout.write('\n')
sys.stdout.flush()
if _dataset_exists(TFRECORD_DIR):
print('tfcecord file exists')
else:
photo_filenames = _get_filenames_and_classes(DATASET_DIR)
random.seed(_RANDOM_SEED)
random.shuffle(photo_filenames)
training_filenames = photo_filenames[_NUM_TEST:]
testing_filenames = photo_filenames[:_NUM_TEST]
_convert_dataset('train', training_filenames, DATASET_DIR)
_convert_dataset('test', testing_filenames, DATASET_DIR)
print('完成')
复制代码
代码:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/4/30 10:59
# @Author : shm
# @Site :
# @File : MyTensorflowTrain.py
# @Software: PyCharm
import os
import tensorflow as tf
from PIL import Image
from nets import nets_factory
import numpy as np
# 不一样字符数量
CHAR_SET_LEN = 36
#t图片高度
IMAGE_HEIGHT = 30
# 图片宽度
IMAGE_WIDTH = 16
# 批次
BATCH_SIZE = 100
# tfrecord文件存放路径
TFRECORD_FILE = "./TFrecord/train.tfrecords"
# placeholder
x = tf.placeholder(tf.float32, [None, 224, 224])
y0 = tf.placeholder(tf.float32, [None])
# 学习率
lr = tf.Variable(0.003, dtype=tf.float32)
# 从tfrecord读出数据
def read_and_decode(filename):
# 根据文件名生成一个队列
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
# 返回文件名和文件
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'image': tf.FixedLenFeature([], tf.string),
'label0': tf.FixedLenFeature([], tf.int64)
})
# 获取图片数据
image = tf.decode_raw(features['image'], tf.uint8)
# tf.train.shuffle_batch必须肯定shape
image = tf.reshape(image, [224, 224])
# 图片预处理
image = tf.cast(image, tf.float32) / 255.0
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
# 获取label
label0 = tf.cast(features['label0'], tf.int32)
return image, label0
# 获取图片数据和标签
image, label0 = read_and_decode(TFRECORD_FILE)
# 使用shuffle_batch能够随机打乱
image_batch, label_batch0 = tf.train.shuffle_batch(
[image, label0], batch_size=BATCH_SIZE,
capacity=50000, min_after_dequeue=10000, num_threads=1)
# 定义网络结构
train_network_fn = nets_factory.get_network_fn(
'alexnet_v2',
num_classes=CHAR_SET_LEN,
weight_decay=0.0005,
is_training=True)
with tf.Session() as sess:
# inputs: a tensor of size [batch_size, height, width, channels]
X = tf.reshape(x, [BATCH_SIZE, 224, 224, 1])
# 数据输入网络获得输出值
logits0,end_points = train_network_fn(X)
# 把标签转成one_hot的形式
one_hot_labels0 = tf.one_hot(indices=tf.cast(y0, tf.int32), depth=CHAR_SET_LEN)
# 计算loss
loss0 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits0, labels=one_hot_labels0))
# 计算总的loss
total_loss = (loss0)
# 优化total_loss
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(total_loss)
# 计算准确率
correct_prediction0 = tf.equal(tf.argmax(one_hot_labels0, 1), tf.argmax(logits0, 1))
accuracy0 = tf.reduce_mean(tf.cast(correct_prediction0, tf.float32))
# 用于保存模型
saver = tf.train.Saver()
# 初始化
sess.run(tf.global_variables_initializer())
# 建立一个协调器,管理线程
coord = tf.train.Coordinator()
# 启动QueueRunner, 此时文件名队列已经进队
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
for i in range(60001):
# 获取一个批次的数据和标签
b_image, b_label0 = sess.run([image_batch, label_batch0])
# 优化模型
sess.run(optimizer, feed_dict={x: b_image, y0: b_label0})
# 每迭代20次计算一次loss和准确率
if i % 20 == 0:
# 每迭代2000次下降一次学习率
if i % 2000 == 0:
sess.run(tf.assign(lr, lr / 3))
acc0, loss_ = sess.run([accuracy0, total_loss],feed_dict={x: b_image,y0: b_label0})
learning_rate = sess.run(lr)
print("Iter:%d Loss:%.3f Accuracy:%.2f Learning_rate:%.4f" % (i, loss_, acc0, learning_rate))
# 保存模型
if acc0 > 0.99:
saver.save(sess, "./models/crack_captcha_model", global_step=i)
break
if i == 60000:
saver.save(sess, "./models/crack_captcha_model", global_step=i)
break
# 通知其余线程关闭
coord.request_stop()
# 其余全部线程关闭以后,这一函数才能返回
coord.join(threads)
复制代码
#coding=utf-8
import os
import tensorflow as tf
from PIL import Image
from nets import nets_factory
import numpy as np
import matplotlib.pyplot as plt
CHAR_SET_LEN = 36
IMAGE_HEIGHT = 30
IMAGE_WIDTH =16
BATCH_SIZE = 1
TFRECORD_FILE = "./TFrecord/test.tfrecords"
# placeholder
x = tf.placeholder(tf.float32, [None, 224, 224])
def read_and_decode(filename):
filename_queue = tf.train.string_input_producer([filename])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
features = tf.parse_single_example(serialized_example,
features={
'image' : tf.FixedLenFeature([], tf.string),
'label0': tf.FixedLenFeature([], tf.int64),
})
image = tf.decode_raw(features['image'], tf.uint8)
image_raw = tf.reshape(image, [224, 224])
#
image = tf.reshape(image, [224, 224])
#
image = tf.cast(image, tf.float32) / 255.0
image = tf.subtract(image, 0.5)
image = tf.multiply(image, 2.0)
#
label0 = tf.cast(features['label0'], tf.int32)
return image, image_raw, label0
image, image_raw, label0 = read_and_decode(TFRECORD_FILE)
#
image_batch, image_raw_batch, label_batch0 = tf.train.shuffle_batch([image, image_raw, label0], batch_size = BATCH_SIZE,capacity = 50000, min_after_dequeue=10000, num_threads=1)
train_network_fn = nets_factory.get_network_fn('alexnet_v2',num_classes=CHAR_SET_LEN,weight_decay=0.0005, is_training=False)
with tf.Session() as sess:
# inputs: a tensor of size [batch_size, height, width, channels]
X = tf.reshape(x, [BATCH_SIZE, 224, 224, 1])
#
logits0,end_points = train_network_fn(X)
#
predict0 = tf.reshape(logits0, [-1, CHAR_SET_LEN])
predict0 = tf.argmax(predict0, 1)
#
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
#
saver = tf.train.Saver()
saver.restore(sess, './models/crack_captcha_model-1080')
#
coord = tf.train.Coordinator()
#
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
count = 0
for i in range(500):
#
try:
b_image, b_image_raw, b_label0 = sess.run([image_batch,image_raw_batch, label_batch0])
except Exception as e:
print(e)
#
img=Image.fromarray(b_image_raw[0],'L')
print('label:',b_label0)
#得到预测值
label0 = sess.run(predict0, feed_dict={x: b_image})
print('predict:',label0)
if b_label0[0] == label0[0]:
count = count + 1
print(count)
#
coord.request_stop()
#
coord.join(threads)
复制代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/10/14 10:25
# @Author : shm
# @Site :
# @File : YZM_Service.py
# @Software: PyCharm
from flask import Flask, request, render_template
import tensorflow as tf
from PIL import Image
from nets import nets_factory
import numpy as np
import base64
from io import BytesIO
def num2char(num):
''' 数字转字符 :param num: :return: '''
if num < 10:
return (num + ord('0'))
elif num < 36:
return (num - 10 + ord('a'))
elif num == 36:
return (ord('_'))
else:
raise ValueError('Error')
def splitimage(img, rownum, colnum):
''' 图片切割 :param img: :param rownum: :param colnum: :return: '''
w, h = img.size
if rownum <= h and colnum <= w:
rowheight = h // rownum
colwidth = w // colnum
r = 0
imlist = []
for c in range(colnum):
box = (c * colwidth, r * rowheight, (c + 1) * colwidth, (r + 1) * rowheight)
imlist.append(img.crop(box))
return imlist
def ImageReshap(img):
''' 预处理224*224 :param img: :return: '''
image_data = img.resize((224, 224))
image_data = np.array(image_data.convert('L'))
return image_data
class LoadModel_v1:
def __init__(self,model_path,char_set_len=36):
''' :param model_path: 模型文件路径 :param char_set_len: '''
self.char_set_len = char_set_len
g = tf.Graph()
with g.as_default():
self.sess = tf.Session(graph=g)
self.graph = self.build_graph()
BATCH_SIZE = 1
self.x = tf.placeholder(tf.float32, [None, 224, 224])
self.img = tf.placeholder(tf.float32, None)
image_data1 = tf.cast(self.img, tf.float32) / 255.0
image_data2 = tf.subtract(image_data1, 0.5)
image_data3 = tf.multiply(image_data2, 2.0)
self.image_batch = tf.reshape(image_data3, [1, 224, 224])
X = tf.reshape(self.x, [BATCH_SIZE, 224, 224, 1])
self.logits0, self.end_points = self.graph(X)
self.sess.run(tf.global_variables_initializer())
saver = tf.train.Saver()
saver.restore(self.sess,model_path)
def build_graph(self):
''' 加载模型文件 :return: '''
train_network_fn = nets_factory.get_network_fn('alexnet_v2',num_classes=self.char_set_len,weight_decay=0.0005,is_training=False)
return train_network_fn
def recognize(self,image):
''' 图片识别 :param image: :return: '''
try:
inputdata = self.sess.run(self.image_batch, feed_dict={self.img: image})
predict0 = tf.reshape(self.logits0, [-1, self.char_set_len])
predict0 = tf.argmax(predict0, 1)
label = self.sess.run(predict0, feed_dict={self.x: inputdata})
text = chr(num2char(label))
return text
except Exception as e:
print("recognize",e)
return ""
def screen_shot(self,img):
''' 图片预处理,截取图片主要区域 :param img: :return: '''
try:
box = (36, 0, 100, 30)
return img.crop(box)
except Exception as e:
print("screenshot:", e)
return None
def img_to_text(self,imgdata):
''' 图片转字符 :return:识别以后的字符结果 '''
yzmstr = ""
with BytesIO() as iofile:
iofile.write(imgdata)
with Image.open(iofile) as img:
img = self.screen_shot(img)
imglist = splitimage(img, 1, 4)
text = []
for im in imglist:
imgreshap = ImageReshap(im)
yzmstr = self.recognize(imgreshap)
text.append(yzmstr)
yzmstr = "".join(text)
return yzmstr
class LoadModel_v2(LoadModel_v1):
def __init__(self,model_path):
super(LoadModel_v2, self).__init__(model_path)
def img_to_text(self,imgdata):
yzmstr = ""
with BytesIO() as iofile:
iofile.write(imgdata)
with Image.open(iofile) as img:
imglist = splitimage(img, 1, 4)
text = []
for im in imglist:
imgreshap = ImageReshap(im)
yzmstr = self.recognize(imgreshap)
text.append(yzmstr)
print(yzmstr)
yzmstr = "".join(text)
return yzmstr
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/Recognition',methods=['POST'])
def recognition():
try:
imgdata = request.form.get('imgdata')
module = request.form.get("module","")
if module == "v1":
decodeData = base64.b64decode(imgdata)
yzmstr = loadModel_model1.img_to_text(decodeData)
return yzmstr
elif module == "v2":
decodeData = base64.b64decode(imgdata)
yzmstr = loadModel_model2.img_to_text(decodeData)
return yzmstr
else:
return "unkonw channel"
except Exception as e:
return repr(e)
if __name__ == "__main__":
#初始化模型1
loadModel_model1 = LoadModel_v1("./models/crack_captcha_model-1080")
#初始化模型2
loadModel_model2 = LoadModel_v2("./models/crack_captcha.model-2140")
app.run(host='0.0.0.0', port=2002, debug=True)
复制代码
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time : 2019/5/6 18:46
# @Author : shm
# Site :
# @File : test.py
# @Software: PyCharm
import base64
import requests
import os
#识别API接口
url = "http://127.0.0.1:2002/Recognition"
#测试用验证码存放路径
path = "./image/pic"
#对应不一样模型版本号
model = "v1"
#model = "v2"
imglist = os.listdir(path)
count = 0
nums = len(imglist)
for file in imglist:
try:
dir = path + "\\" + file
with open(dir,"rb") as fp:
database64 = base64.b64encode(fp.read())
form = {
'module':model,
'imgdata': database64
}
r = requests.post(url, data=form)
res = r.text
yuan = file[0:4]
if yuan.lower() == res:
count = count + 1
print("Success")
else:
print(file[0:4],"==",res)
except Exception as e:
print(e)
print("%s平台-----总共:%s-----正确识别:%s" % (model,nums,count))
复制代码
此处主要介绍了如何模拟生成验证码训练样本数据,以及如何切分验证码作识别。后续文章会在此文章基础上,实现图片不切分总体识别模型训练、以及不定长验证码识别技术方案实现、基于深度学习验证码通用识别模型解决方案 项目相关的代码后期都会同步到git上,后期会把地址添加进来,今天先写到这儿 文章中有不足地方和任何疑问,欢迎加QQ:1071830794进行交流探讨,欢迎你们一块儿学习和成长。