模糊分类项目
人脸模糊分类
该项目主要是用于对亚洲人脸、外国人脸进行10个等级的模糊分类,大体流程为:
1、造数据,对现有的人脸数据进行模糊处理,处理为10个等级模糊,一般使用模糊手段为:高斯模糊、噪声模糊、运动模糊等。
2、模糊等级处理好,对数据进行数据增强,左右变换增加数据集、归一化处理。
3、将所有的数据使用tensorflow框架,多线程方式生成tfrecord文件,具体细节详见代码。
4、搭建神经网络,其中8层卷积层,2层池化层,计算loss值,及预测值。
5、训练集进行模型训练,保存模型,测试集进行测试。
一、数据集构造
通过对2万多张的清晰原始图片分为三份,对每份进行模糊处理,使其拥有三种不同类型的模糊,如高斯模糊,噪声模糊,运动模糊
原始样本数据如下:
1、添加高斯模糊生成10个等级模糊类别
使用cv2.GaussianBlur(img, (15, 15), 1)算法对原始清晰图进行10个等级的模糊处理,具体处理方式如gauss_class.py脚本:
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2
import numpy as np
import sys,os
# 创建目录
for i in range(0,10):
try:
path = os.makedirs('./class/gauss_class/gauss_blur_%d'%i)
# path = os.makedirs('./test/gauss_class/gauss_blur_%d'%i)
except:
continue
png_dir=sys.argv[1]
gt_imdb=[]
count = 0
# 读取目录下的图片到列表中
for item in os.listdir(png_dir):
gt_imdb.append(os.path.join(png_dir,item))
# 处理每张图片
for jpg_file in gt_imdb:
if os.path.exists(jpg_file):
img = cv2.imread(jpg_file,0)
else:
print jpg_file
continue
h,w = img.shape
img_z=np.zeros((h,w),dtype=np.uint8)
img_o=np.ones((h,w),dtype=np.uint8)
count += 1
mask_img = np.where(img>0, img_o, img_z)
step = 0.2
if count % 10000 ==0:
print(count)
jj =0
for j in range(10):
# 使用高斯模糊算法,使用15*15的高斯核,使用在X轴方向上不同的标准差进行高斯模糊
img_b = cv2.GaussianBlur(img, (15, 15), step*(j+1))
# img_b = cv2.GaussianBlur(img, (15, 15), step*(j+1))
img_b = np.where(mask_img>0, img_b, img_z)
jj +=1
savename = "./class/gauss_class/gauss_blur_%d/gauss_blur_%d.jpg"%(j,count)
# savename = "./class/gauss_class/gauss_blur_%d/gauss_blur_%d.jpg"%(jj,count)
cv2.imwrite(savename, img_b)
2、添加高斯噪声生成10个等级模糊类别
通过使用均值及方差的方式,使用不同的权重对原始图片添加10个等级的噪声,具体操作如noise_class.py脚本:
# encoding:utf-8
import cv2
import numpy as np
import base64
import json
import sys,os
import random
import tqdm
gauss_thd_0 = 0.1
gauss_thd_1 = 0.2
def sp_noise(image,prob):
'''
添加椒盐噪声
prob:噪声比例
'''
output = np.zeros(image.shape,np.uint8)
thres = 1 - prob
for i in range(image.shape[0]):
for j in range(image.shape[1]):
rdn = random.random()
if rdn < prob:
output[i][j] = 0
elif rdn > thres:
output[i][j] = 255
else:
output[i][j] = image[i][j]
return output
def gauss_noise(image, mean=0, var=0.3):
'''
添加高斯噪声
mean : 均值
var : 方差
'''
image = np.array(image/255.0, dtype=float)
scale = random.uniform(0.3,1.0)
h, w = image.shape
h_s = int(scale*h)
w_s = int(scale*w)
noise = np.random.normal(mean, var, (h_s,w_s))
noise = cv2.resize(noise,(w,h))
# out = image
# out = noise
out = image + noise
# if out.min() < 0:
# low_clip = -1.
# else:
# low_clip = 0.
low_clip = 0.
out = np.clip(out, low_clip, 1.0)
out = np.uint8(out*255)
#cv.imshow("gasuss", out)
return out
for i in range(0,10):
try:
path = os.makedirs('./class/noise_class/noise_blur_%d'%i)
# path = os.makedirs('./test/noise_class/noise_blur_%d'%i)
except:
continue
png_dir=sys.argv[1]
gt_imdb=[]
count = 0
for item in os.listdir(png_dir):
gt_imdb.append(os.path.join(png_dir,item))
for jpg_file in gt_imdb:
if os.path.exists(jpg_file):
img = cv2.imread(jpg_file,0)
else:
print jpg_file
continue
h,w = img.shape
img_z=np.zeros((h,w),dtype=np.uint8)
img_o=np.ones((h,w),dtype=np.uint8)
count += 1
mask_img = np.where(img>0, img_o, img_z)
step = 0.008
# step = 0.02
jj =0
for j in range(10):
# prob = random.uniform(gauss_thd_0,gauss_thd_1)
prob = step*(j+1)
img_g = gauss_noise(img, var = prob)
img_g = np.where(mask_img>0, img_g, img_z)
jj += 1
savename = "./class/noise_class/noise_blur_%d/noise_blur_%d.jpg"%(j, count)
#savename = "./class/noise_class/noise_blur_%d/noise_blur_%d.jpg"%(jj, count)
cv2.imwrite(savename, img_g)
3、添加运动模糊生成10个等级的模糊类别
通过生成卷积核和锚点,使原始图片产生不同角度的运动模糊,详见motion_class.py脚本:
# coding=utf-8
import math
import numpy as np
import cv2
import random
import sys, os
#生成卷积核和锚点
def genaratePsf(length,angle):
EPS=np.finfo(float).eps
alpha = (angle-math.floor(angle/ 180) *180) /180 * math.pi
cosalpha = math.cos(alpha)
sinalpha = math.sin(alpha)
if cosalpha < 0:
xsign = -1
elif angle == 90:
xsign = 0
else:
xsign = 1
psfwdt = 1;
#模糊核大小
sx = int(math.fabs(length*cosalpha + psfwdt*xsign - length*EPS))
sy = int(math.fabs(length*sinalpha + psfwdt - length*EPS))
psf1=np.zeros((sy,sx))
half = length/2
#psf1是左上角的权值较大,越往右下角权值越小的核。
#这时运动像是从右下角到左上角移动
for i in range(0,sy):
for j in range(0,sx):
psf1[i][j] = i*math.fabs(cosalpha) - j*sinalpha
rad = math.sqrt(i*i + j*j)
# print rad
if rad >= half and math.fabs(psf1[i][j]) <= psfwdt:
temp = half - math.fabs((j + psf1[i][j] * sinalpha) / cosalpha)
psf1[i][j] = math.sqrt(psf1[i][j] * psf1[i][j] + temp*temp)
psf1[i][j] = psfwdt + EPS - math.fabs(psf1[i][j]);
if psf1[i][j] < 0:
psf1[i][j] = 0
#运动方向是往左上运动,锚点在(0,0)
anchor=(0,0)
#运动方向是往右上角移动,锚点一个在右上角
#同时,左右翻转核函数,使得越靠近锚点,权值越大
if angle<90 and angle>0:
psf1=np.fliplr(psf1)
anchor=(psf1.shape[1]-1,0)
elif angle>-90 and angle<0:#同理:往右下角移动
psf1=np.flipud(psf1)
psf1=np.fliplr(psf1)
anchor=(psf1.shape[1]-1,psf1.shape[0]-1)
elif anchor<-90:#同理:往左下角移动
psf1=np.flipud(psf1)
anchor=(0,psf1.shape[0]-1)
psf1=psf1/psf1.sum()
return psf1,anchor
for i in range(0,10):
try:
path = os.makedirs('./class/motion_class/motion_blur_%d'%i)
# path = os.makedirs('./test/motion_class/motion_blur_%d'%i)
except:
continue
png_dir=sys.argv[1]
gt_imdb=[]
count = 0
count_e = 0
for item in os.listdir(png_dir):
gt_imdb.append(os.path.join(png_dir,item))
for jpg_file in gt_imdb:
if os.path.exists(jpg_file):
img = cv2.imread(jpg_file,0)
h,w = img.shape
else:
print jpg_file
continue
# if count > 100:
# break
img_z=np.zeros((h,w),dtype=np.uint8)
img_o=np.ones((h,w),dtype=np.uint8)
mask_img = np.where(img>0, img_o, img_z)
# step = 1.3
step = 1
count += 1
if count % 10000 == 0:
print(count)
jj = 0
for j in range(0,10):
lenth = step * j + 3
# step += 0.04
# lenth = random.randint(11,25)
angle = random.randint(280,350)
kernel,anchor=genaratePsf(lenth,angle)
try:
img_m=cv2.filter2D(img,-1,kernel,anchor=anchor)
except:
count_e += 1
print count_e
continue
jj += 1
img_m = np.where(mask_img>0, img_m, img_z)
#savename = "./class/motion_class/motion_blur_%d/motion_blur_%d.jpg"%(j, count)
savename = "./class/motion_class/motion_blur_%d/motion_blur_%d.jpg"%(jj, count)
cv2.imwrite(savename, img_m)
二、数据预处理
1、flip左右变换
通过flip左右变换,增加了数据集中的数据样本,具体操作详见flip.py脚本。
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2
import numpy as np
import sys,os
import random
png_dir = sys.argv[1]
gt_imdb = []
file_dict = {}
img_zero = np.zeros((96,96),dtype=np.float64)
img_one = np.ones((96,96),dtype=np.float64) *128
for item in os.listdir(png_dir):
dir_path = os.path.join(png_dir,item)
dirs = os.listdir(dir_path)
# try:
# path = os.makedirs('./normal/normal_%s/%s'%(png_dir,item))
# except:
# continue
for d in dirs:
gt_imdb.append(os.path.join(dir_path,d))
# print(len(gt_imdb))
file_dict[item] = gt_imdb
gt_imdb = []
for k in file_dict.keys():
count = 0
for jpg_file in file_dict[k]:
if os.path.exists(jpg_file):
img = cv2.imread(jpg_file,0)
else:
print jpg_file
continue
count += 1
if count % 1000 == 0:
print count
img_a = cv2.flip(img,1,dst=None)
savename = "/home/xjyu/lgx/end_face/res_w4/%s/flip_%d.jpg"%(k,count)
cv2.imwrite(savename, img_a)
为了使数据更丰富,还把最原始清晰的图片放入分类0中,把原始最模糊的分类放入9中,后续因测试样本的数据不同,还加入了阴阳脸、毛玻璃脸等数据,并且专门对外国人数据进行了处理。
2、归一化操作
通过均值对图片进行归一化,这里归一化提升了图片的整体亮度,使所有数据样本在相等的格局下进行训练。详见normal.py脚本:
#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2
import numpy as np
import sys,os
import random
png_dir = sys.argv[1]
gt_imdb = []
file_dict = {}
img_zero = np.zeros((96,96),dtype=np.float64)
img_one = np.ones((96,96),dtype=np.float64) *128
for item in os.listdir(png_dir):
dir_path = os.path.join(png_dir,item)
dirs = os.listdir(dir_path)
try:
path = os.makedirs('./normal/normal_%s/%s'%(png_dir,item))
except:
continue
for d in dirs:
gt_imdb.append(os.path.join(dir_path,d))
# print(len(gt_imdb))
file_dict[item] = gt_imdb
gt_imdb = []
for k in file_dict.keys():
count = 0
for jpg_file in file_dict[k]:
if os.path.exists(jpg_file):
img = cv2.imread(jpg_file,0)
else:
print jpg_file
continue
img_a = img.astype(np.float64)
x ,y = img.shape[0:2]
img_o = np.ones((x,y),dtype=np.float64) * 128
count += 1
if count % 1000 == 0:
print count
mean = np.mean(img_a)
scale = 128.0/float(mean)
img_b = img_a * scale
img_diff = abs(img_b - img_o)
mean_diff = np.mean(img_diff)
scale_2 = 30.0/mean_diff
img_e = img_b - img_o
img_f = img_e * scale_2
img_g = img_f + img_o
savename = "./normal/normal_%s/%s/%s_%d.jpg"%(png_dir,k,k,count)
# print(savename)
img_g=np.clip(img_g,0,255)
cv2.imwrite(savename, img_g.astype(np.uint8))
归一化处理后总数据42万
3、汇总文件
将归一化后的图片按照分类类别,汇总为txt文件,并打乱文件,详见汇总gen_txt_all.py、打乱shuffle.py脚本:
gen_txt_all.py
import sys,os
import time
import cv2
import json
import random
path = sys.argv[1]
savefile = sys.argv[2]
num_dict = {"0":1.0,"1":0.9,"2":0.8,"3":0.7,"4":0.6,"5":0.4,"6":0.3,"7":0.2,"8":0.1,"9":0.0}
fw = open(savefile,'w')
files = os.listdir(path)
for item in files:
dir_path = os.path.join(path,item)
dirs = os.listdir(dir_path)
for items in dirs:
d_path = os.path.join(dir_path,items)
d_s = os.listdir(d_path)
gt_imdb = []
str_num = items.split('_')[-1]
num = num_dict[str_num]
for d in d_s:
gt_imdb.append(os.path.join(d_path,d))
# random.shuffle(gt_imdb)
# print(len(gt_imdb))
for jpgFile in gt_imdb:
fw.writelines(jpgFile)
fw.writelines(' %.1f\n'%num)
fw.close
shuffle.py:
#coding:utf-8
import os
import random
import sys
import time
txt_file = sys.argv[1]
fw_file = sys.argv[2]
dataset = []
fr=open(txt_file, 'r')
fw=open(fw_file, 'w')
for line in fr.readlines():
dataset.append(line)
random.shuffle(dataset)
for data in dataset:
fw.writelines(data)
fw.close
三、将数据集生成Tfrecord文件
将汇总好的txt文件通过tfrecord文件来保存和读取数据,具体操作详见gen_tf_records_blur_v2.py脚本。
tfrecord详解如下:https://blog.csdn.net/sinat_29957455/article/details/83316173
# encoding: utf-8
import numpy as np
import tensorflow as tf
import os
import cv2
from tqdm import tqdm
import re
import sys
import random
sys.path.append('/home/xjyu/lgx/')
print(sys.path)
from config_blur import cfg
from multiprocessing import Process,Manager
sp_thd_0 = 0.006
sp_thd_1 = 0.03
gauss_thd_0 = 0.02
gauss_thd_1 = 0.1
def sp_noise(image,prob):
'''
添加椒盐噪声
prob:噪声比例
'''
output = np.zeros(image.shape,np.uint8)
thres = 1 - prob
for i in range(image.shape[0]):
for j in range(image.shape[1]):
rdn = random.random()
if rdn < prob:
output[i][j] = 0
elif rdn > thres:
output[i][j] = 255
else:
output[i][j] = image[i][j]
return output
def gauss_noise(image, mean=0, var=0.3):
'''
添加高斯噪声
mean : 均值
var : 方差
'''
image = np.array(image/255.0, dtype=float)
noise = np.random.normal(mean, var, image.shape)
# out = image
# out = noise
out = image + noise
# if out.min() < 0:
# low_clip = -1.
# else:
# low_clip = 0.
low_clip = 0.
out = np.clip(out, low_clip, 1.0)
out = np.uint8(out*255)
#cv.imshow("gasuss", out)
return out
def load_file(file_path, file_dir):
'''
load imgs_path, classes and labels
'''
dataset = []
with open(file_path, 'r') as f:
lines = f.readlines()
for line in lines:
data_example = dict()
ss = line.strip().split(' ')
img_path = os.path.join(file_dir,ss[0])
# label1 = float(line.strip().split(' ')[1])
# label2 = float(line.strip().split(' ')[3])
data_example['filename'] = img_path
data_example['label'] = float(ss[1])
# if float(ss[1]) > 0.45:
# data_example['label'] = 1.0
# else:
# data_example['label'] = 0.0
dataset.append(data_example)
return dataset
def extract_image(image_path, is_resize=True):
'''
get b->g->r image data
'''
img = cv2.imread(image_path, 0)
# img_blur = cv2.GaussianBlur(img, (5, 5), 1)
# img_abs = np.abs(img-img_blur)
try:
h, w = img.shape
except:
print image_path
# print image_path
if is_resize:
assert(h <= cfg.height)
assert(w <= cfg.width)
# image = cv2.resize(img, (cfg.height, cfg.height))
# cv2.imshow("img", image)
# cv2.waitKey(0)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
if random.choice([0,1]) == 0:
img = cv2.flip(img, 1)
if w == cfg.width :
start_x = 0
else:
start_x = random.randint(0, cfg.width-w)
if h == cfg.height:
start_y = 0
else:
start_y = random.randint(0, cfg.height-h)
image = np.zeros((cfg.height,cfg.width),dtype=np.uint8)
image[start_y:start_y+h,start_x:start_x+w] = img[0:h,0:w]
# image = np.zeros((cfg.height,cfg.width, 2),dtype=np.uint8)
# image[start_y:start_y+h,start_x:start_x+w, 0] = img[0:h,0:w]
# image[start_y:start_y+h,start_x:start_x+w, 1] = img_abs[0:h,0:w]
z_img=np.zeros((cfg.height,cfg.width),dtype=np.uint8)
o_img=np.ones((cfg.height,cfg.width),dtype=np.uint8)*255
bmask=np.where(image<2,z_img,o_img)
# bmask=np.where(image[:,:,0]<2,z_img,o_img)
image_heatmap=cv2.resize(bmask,(cfg.heatmap_size,cfg.heatmap_size))
# image = image[:, :, 0]
else:
image = img
image_heatmap=bmask
# cv2.imshow('img', image)
# cv2.waitKey(0)
image_data = np.array(image, dtype='uint8')
return image_data, image_heatmap
def extract_heatmap_image(filename):
'''
get b->g->r image data
'''
heatmap_name = filename.replace("normalize", "mask")
image = cv2.imread(heatmap_name,0)
image = cv2.resize(image, (cfg.heatmap_size, cfg.heatmap_size))
# cv2.imshow('aaaa',image)
# cv2.waitKey(0)
image_data = np.array(image, dtype='uint8')
return image_data
def _add_to_tfrecord(dataset, index, example_list, total_len, tfrecord_writer, read_lock, write_lock):
while True:
read_lock.acquire()
if index.value > total_len - 1:
print 'process end !!'
print index.value, total_len
read_lock.release()
break
num = index.value
index.value += 1
read_lock.release()
data_next=dataset[num]
filename = data_next['filename']
label = data_next['label']
if num%1000 == 0:
print num, filename
# time.sleep(0.1)
img, img_heatmap = extract_image(filename, is_resize=True)
img = img.tostring()
img_heatmap = img_heatmap.tostring()
example = tf.train.Example(features=tf.train.Features(feature={
'feature': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img])),
'label' : tf.train.Feature(float_list = tf.train.FloatList(value=[label])),
'heatmap': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img_heatmap]))
}))
# image_data, height, width = _process_image_withoutcoder(filename)
# example = _convert_to_example_simple(data_next, image_data)
write_lock.acquire()
example_list.append(example.SerializeToString())
write_lock.release()
def run_encode(file_path, file_dir, tf_records_filename):
'''
encode func
'''
dataset= load_file(file_path, file_dir)
writer = tf.io.TFRecordWriter(tf_records_filename)
total_len = len(dataset)
manager=Manager()
read_lock = manager.Lock()
write_lock = manager.Lock()
index = manager.Value('i',0)
example_list = manager.list()
proc=[Process(target=_add_to_tfrecord,args=(dataset,index,example_list,total_len,writer,read_lock,write_lock)) for i in xrange(24)]
for p in proc:
p.start()
for p in proc:
p.join()
for item in example_list:
writer.write(item)
# for i in tqdm(range(imgs_path.shape[0])):
# img = extract_image(imgs_path[i], is_resize=True)
# # print imgs_path[i], labels[i]
# img = img.tostring()
# img_heatmap = extract_heatmap_image(imgs_path[i], labels1[i], labels2[i])
# img_heatmap = img_heatmap.tostring()
# example = tf.train.Example(features=tf.train.Features(feature={
# 'feature': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img])),
# 'heatmap': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img_heatmap]))
# }))
# writer.write(example.SerializeToString())
writer.close()
if __name__ == '__main__':
# file_path = '/home/xjyu/sourcecode/liveness/blur_quantize/prepare_data/train.txt'
# file_dir = '/home/xjyu/blur_data_729/images'
file_path = '/home/xjyu/lgx/end_face/txt/txt_class_shuffle/shuffle_class10_8.txt'
file_dir = '/home/xjyu/lgx/end_face'
tf_records_filename = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class10_8.records'
run_encode(file_path, file_dir, tf_records_filename)
四、搭建卷积神经网络
通过搭建8层卷积层,2层池化层,对图片进行学习,预测图片的分类,通过预测值,与真实值计算loss值。
全文配置文件如config_blur.py脚本:
from easydict import EasyDict as edict
import numpy as np
import tensorflow as tf
import os
cfg = edict()
cfg.classes = 2
cfg.names = ['False', 'True']
cfg.batch_size = 256
cfg.height = 96
cfg.width = 96
cfg.channel = 1
cfg.heatmap_size = 6
cfg.theta = 0.7
# cfg.data_path = '/home/xjyu/lgx/blur_quantize/data/tfrecord_files/train_48x48_20190617.records'
# cfg.ckpt_path = '/home/xjyu/lgx/blur_quantize/ckpt/20190617_48x48/' # v35 nice
# cfg.data_path = '/home/xjyu/lgx/blur_quantize/data/tfrecord_files/train_128_rgb_v1.records'
# cfg.data_path = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class_f.records'
cfg.data_path = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class10_8.records'
# cfg.ckpt_path = '/home/xjyu/lgx/end_face/ckpt/ckpt66_11/' # v35 nice ckpt_f/livenessDet_blur_3-27
cfg.ckpt_path = '/home/xjyu/lgx/end_face/ckpt/ckpt10_7/' # v35 nice
# training options
cfg.train = edict()
cfg.train.ignore_thresh = .5
cfg.train.ohem_ratio = 0.1
cfg.train.momentum = 0.9
cfg.train.bn_training = True
cfg.train.weight_decay = 0.000001 # 0.00004
cfg.train.learning_rate = [1e-3, 1e-4, 1e-5]
cfg.train.max_batches = 2000000 # 63000
cfg.train.lr_steps = [10000., 20000.]
cfg.train.lr_scales = [.1, .1]
cfg.train.num_gpus = 1
cfg.train.tower = 'tower'
cfg.train.learn_rate = 0.001
cfg.train.learn_rate_decay = 0.8 # 0.9
cfg.train.learn_rate_decay_epoch = 2
# cfg.train.num_samples = 353331
# cfg.train.num_samples = 353506
# cfg.train.num_samples = 479755
cfg.train.num_samples = 424165
# cfg.train.num_samples = 591102
cfg.epochs = 100
cfg.PRINT_LAYER_LOG = True
'''
quaitization_param
'''
cfg.P_BITW = 4
cfg.P_BITA = 4
cfg.P_BITG = 32
cfg.P_is_quantize = True
cfg.P_net_batchnorm = True
1、搭建神经网络
使用8层卷积层、2层池化层,对图片进行学习,使用dorefa插件对模型进行量化处理。
模型搭建如network_cdcn.py脚本:
#!/usr/bin/env python
# encoding: utf-8
# import tensorflow as tf
# import tensorflow.contrib.slim as slim
# from tensorflow.python.framework import ops
# import sys
# sys.path.append('..')
# import numpy as np
# from config import cfg
# from models import resnet_v1, resnet_utils
# from tensorflow.contrib import layers as layers_lib
# from tensorflow.contrib.framework.python.ops import add_arg_scope
# from tensorflow.contrib.framework.python.ops import arg_scope
# from tensorflow.contrib.layers.python.layers import initializers
# from tensorflow.contrib.layers.python.layers import layers
# from tensorflow.contrib.layers.python.layers import regularizers
# from tensorflow.contrib.layers.python.layers import utils
# from tensorflow.python.framework import ops
# from tensorflow.python.ops import array_ops
# from tensorflow.python.ops import nn_ops
# from tensorflow.python.ops import variable_scope
import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops
import sys
sys.path.append('..')
import numpy as np
from config_blur import cfg
import re
from dorefa import get_dorefa
fw = None
fa = None
fg = None
PRINT_LAYER_LOG = cfg.PRINT_LAYER_LOG
is_quantize = cfg.P_is_quantize
from contextlib import contextmanager
@contextmanager
def custom_getter_scope(custom_getter):
scope = tf.get_variable_scope()
if False:
with tf.variable_scope(
scope, custom_getter=custom_getter,
auxiliary_name_scope=False):
yield
if True:
ns = tf.get_default_graph().get_name_scope()
with tf.variable_scope(
scope, custom_getter=custom_getter):
with tf.name_scope(ns + '/' if ns else ''):
yield
def remap_variables(fn):
tf.python_io.tf_record_iterator
def custom_getter(getter, *args, **kwargs):
v = getter(*args, **kwargs)
return fn(v)
return custom_getter_scope(custom_getter)
def prelu(inputs):
alphas = tf.get_variable('alpha', inputs.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32)
pos = tf.nn.relu(inputs)
neg = alphas * (inputs - abs(inputs)) * 0.5
return pos + neg
def pnet_action_relu6(inputs):
res = tf.nn.relu6(inputs)
if P_is_quantize:
res = P_fa(res)
return res
def p_net_prelu(inputs):
if cfg.P_net_batchnorm:
x = inputs
name = re.sub(r"tower_\d/", "", x.op.name)
N, H, W, oc = x.get_shape().as_list()
affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
initializer=np.array(np.ones((1,1,1,oc)), dtype=np.float32))
affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
initializer=np.array(np.zeros((1,1,1,oc)), dtype=np.float32))
inputs = (tf.abs(affine_k) + 1.0) * x + affine_b
alphas = tf.get_variable('alpha', inputs.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32)
with variable_scope.variable_scope("prelu") as sc:
pos = tf.nn.relu(inputs)
neg = inputs - abs(inputs)
neg = alphas * neg * 0.5
if P_is_quantize:
pos = P_fa(pos)
neg = P_fa(neg, is_neg = True)
res = pos + neg
print("action neeed tensor name",res)
return res
def action_relu6(inputs):
if True:
x = inputs
name = re.sub(r"tower_\d/", "", x.op.name)
if "fc" not in name:
if len(x.shape) == 2:
N, oc = x.get_shape().as_list()
affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
initializer=np.array(np.ones((1,oc)), dtype=np.float32))
affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
initializer=np.array(np.zeros((1,oc)), dtype=np.float32))
else:
N, H, W, oc = x.get_shape().as_list()
affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
initializer=np.array(np.ones((1,1,1,oc)), dtype=np.float32))
affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
initializer=np.array(np.zeros((1,1,1,oc)), dtype=np.float32))
inputs = (tf.abs(affine_k) + 1.0) * x + affine_b
# res = tf.nn.relu6(inputs)
return inputs
# batch_norm_params_1 = {
# 'is_training': True, 'decay': 0.9995,
# 'epsilon': 1e-5, 'scale': True,
# 'updates_collections': ops.GraphKeys.UPDATE_OPS, 'center': True,
# #'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
# 'trainable': cfg.train.bn_training,
# }
def network_arg_scope(
is_training=True, weight_decay=cfg.train.weight_decay, batch_norm_decay=0.9,
batch_norm_epsilon=1e-5, batch_norm_scale=False):
batch_norm_params = {
'is_training': is_training, 'decay': batch_norm_decay,
'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale,
'updates_collections': ops.GraphKeys.UPDATE_OPS, 'center': False,
#'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
'trainable': cfg.train.bn_training,
}
with slim.arg_scope(
[slim.conv2d, slim.separable_convolution2d],
weights_regularizer = slim.l2_regularizer(weight_decay),
weights_initializer = slim.variance_scaling_initializer(),
trainable = is_training,
# activation_fn = pnet_action_relu6,
activation_fn = action_relu6,
normalizer_fn = slim.batch_norm,
# normalizer_fn = None,
normalizer_params = batch_norm_params,
padding='same'):
with slim.arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
return arg_sc
class Network(object):
def __init__(self):
pass
def inference(self, mode, inputs, scope='BioRecNet'):
is_training = mode
if is_quantize:
global fw,fa,fg
# BITW,BITA,BITG = [int(tmp) for tmp in bitwidth.split(",")]
BITW = cfg.P_BITW
BITA = cfg.P_BITA
BITG = cfg.P_BITG
if fw == None:
fw, fa, fg = get_dorefa(BITW,BITA,BITG)
# global layer_num
# if layer_num > 1:
# layer_num = 1
def new_get_variable(v):
name = v.op.name
if not name.endswith('weights'):
return v
else:
if is_quantize:
return fw(v)
else:
return v
def conv2d(inputs, c_outputs, s, name, padding='same'):
# global layer_num
# layer_num = layer_num + 1
output = slim.conv2d(inputs, num_outputs=c_outputs, kernel_size=[3,3], stride=s, padding=padding, scope=name)
if is_quantize:
output = fa(output)
output = fg(output)
return output
def max_unpool_2x2(x):
out = tf.concat([x, tf.zeros_like(x)], 3)
out = tf.concat([out, tf.zeros_like(out)], 2)
# out = tf.concat([x, x], 3)
# out = tf.concat([out, out], 2)
input_shape = x.shape.as_list()
out_shape = [-1,input_shape[1]*2,input_shape[2]*2,input_shape[3]]
return tf.reshape(out, out_shape)
def maxpool2x2(input, name):
# global layer_num
# layer_num = layer_num + 1
output = slim.max_pool2d(input, kernel_size=[2, 2], stride=2, scope=name)
return output
def fully_connected(input, c_outputs, name):
output = slim.fully_connected(input, c_outputs, activation_fn=None,normalizer_fn=None, scope=name)
if PRINT_LAYER_LOG:
print(name, output.get_shape())
return output
with slim.arg_scope(network_arg_scope(is_training=is_training)):
with tf.compat.v1.variable_scope(scope, reuse=False), remap_variables(new_get_variable):
conv0 = conv2d(inputs, 32, 1, name='conv_0') # 96 -
print ('conv0:',conv0.shape)
conv1 = conv2d(conv0, 32, 2, name='conv_1') # 48 -
print ('conv1:',conv1.shape)
conv2 = conv2d(conv1, 32, 1, name='conv_2') # 48 -
print ('conv2:',conv2.shape)
conv3 = conv2d(conv2, 32, 2, name='conv_3') # 24 -
print ('conv3:',conv3.shape)
conv4 = conv2d(conv3, 32, 1, name='conv_4') # 24 -
print ('conv4:',conv4.shape)
conv5 = conv2d(conv4, 64, 2, name='conv_5') # 12 -
print ('conv5:',conv5.shape)
conv6 = conv2d(conv5, 64, 1, name='conv_6') # 12 -
print ('conv6:',conv6.shape)
conv7 = conv2d(conv6, 64, 2, name='conv_7') # 6 -
print ('conv7:',conv7.shape)
# ap0=tf.nn.avg_pool(conv3, ksize=4, strides=4, padding='VALID')
ap0=tf.nn.max_pool(conv3, ksize=4, strides=4, padding='VALID')
print ('ap0:',ap0.shape)
# ap1=tf.nn.avg_pool(conv5, ksize=2, strides=2, padding='VALID')
ap1=tf.nn.max_pool(conv5, ksize=2, strides=2, padding='VALID')
print ('ap1:',ap1.shape)
concat0=tf.concat([ap0,ap1,conv7],3,name='concat0')
conv8 = conv2d(concat0, 64, 1, name='conv_8') # 6 -
print ('conv8:',conv8.shape)
heatmap_pred = slim.conv2d(conv8, 1, kernel_size=[3,3], stride=1, scope='conv_9', activation_fn=tf.sigmoid, normalizer_fn=None) # 6 -
# print ('conv8:',conv8.shape)
if is_training:
l2_loss = tf.add_n(tf.losses.get_regularization_losses())
return heatmap_pred, l2_loss
else:
# return heatmap_pred, heatmap_pred
return heatmap_pred, (conv0*15.0-8.0)
# return heatmap_pred, inputs*127.5-128
量化插件如dorefa.py脚本:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@author: Yuxin Wu,dzhang
@contact: ppwwyyxxc@gmail.com,dong.zhang@ingenic.com
"""
import tensorflow as tf
from tensorflow.python.ops import math_ops
from tensorflow.contrib.framework import add_model_variable
from tensorflow.python.training import moving_averages
# from tensorpack.models import *
# from tensorpack.tfutils.tower import get_current_tower_context
slim = tf.contrib.slim
from tensorflow.contrib.framework.python.ops import variables
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import dtypes
import re
import numpy as np
weight_tensor_list = []
is_first = True
def weight_prepara(x):
mean = math_ops.reduce_mean(x)
var = math_ops.reduce_mean( math_ops.squared_difference(x, array_ops.stop_gradient(mean)) )
std = tf.sqrt(var)
x = tf.clip_by_value(x, mean - 3.0*std, mean + 3.0*std)
return x
#@graph_memoized
def get_dorefa(bitW, bitA, bitG):
"""
return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
It's unsafe to call this function multiple times with different parameters
"""
G = tf.get_default_graph()
def quantize_w(x, k):
k = k -1
n = float(2**k)
with G.gradient_override_map({"Round": "Identity"}):
x = tf.round(x * n)
x = tf.clip_by_value(x,-n,n-1) / n
return x
def quantize_a(x, k):
n = float(2**k - 1)
with G.gradient_override_map({"Round": "Identity"}):
return tf.round(x * n) / n
def quantize_g(x, k):
n = float(2**k - 1)
with G.gradient_override_map({"Round": "Identity"}):
return tf.round(x * n) / n
def fw(x):
# print("Quantized Weight: %s, BITW: %d" % (x.op.name, bitW))
if bitW == 32:
return x
if bitW == 1: # BWN
with G.gradient_override_map({"Sign": "Identity"}):
E = tf.stop_gradient(tf.reduce_mean(tf.abs(x)))
return tf.sign(x / E) * E
pre_max_value = 1.0
if len(x.shape) == 2:
# print("dzhang debug fc!!!")
pre_max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
if x.shape[-1] == 1 or x.shape[-1] == 4:
print("#########################")
pre_max_value = math_ops.reduce_max(tf.abs(x))
if len(x.shape) == 4:
# print("dzhang debug conv!!!")
pre_max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
x = weight_prepara(x)
if len(x.shape) == 2:
# print("dzhang debug fc!!!")
max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
if x.shape[-1] == 1 or x.shape[-1] == 4:
print("#########################")
max_value = math_ops.reduce_max(tf.abs(x))
if len(x.shape) == 4:
# print("dzhang debug conv!!!")
max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
x = x / max_value
res = quantize_w(x, bitW)
res = res * pre_max_value
return res
def fa(x):
# print("Quantized Action Relu6: %s, BITA: %d" % (x.op.name, bitA))
if bitA == 32:
return x
x = tf.clip_by_value(x, 0.0, 6.0)
x = x / 6.0
res = quantize_a(x, bitA)
return res
def fg(x):
return x
return fw, fa, fg
2、计算loss损失函数
通过真实值与预测值,计算loss损失值,具体计算过程如losses_blur.py脚本:
#!/usr/bin/env python
# encoding: utf-8
import tensorflow as tf
import sys
sys.path.append('..')
from config_blur import cfg
def ohkm(loss, top_k):
ohkm_loss = 0.
for i in range(cfg.batch_size):
sub_loss = loss[i]
topk_val, topk_idx = tf.nn.top_k(sub_loss, k=top_k, sorted=False, name='ohkm{}'.format(i))
tmp_loss = tf.gather(sub_loss, topk_idx, name='ohkm_loss{}'.format(i)) # can be ignore ???
ohkm_loss += (tf.reduce_sum(sub_loss)-tf.reduce_sum(tmp_loss)) / (int(cfg.heatmap_size * cfg.heatmap_size)-top_k)
ohkm_loss /= cfg.batch_size
return ohkm_loss
def ohkm_all(loss, top_k):
topk_val, topk_idx = tf.nn.top_k(loss, k=top_k, sorted=False, name='ohkm_all')
tmp_loss = tf.gather(loss, topk_idx, name='ohkm_loss_all') # can be ignore ???
# ohkm_loss = tf.reduce_sum(tmp_loss)/cfg.batch_size
ohkm_loss = tf.reduce_sum(tmp_loss) / top_k
return ohkm_loss
def loss(preds, labels, heatmap_pre, heatmap):
labels = tf.cast(labels, tf.int64)
print('pre labels', labels.get_shape())
labels = tf.reshape(labels, (cfg.batch_size, -1))
print('labels', labels.get_shape())
labels = tf.one_hot(labels, cfg.classes)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
pred_loss = tf.reduce_mean(cross_entropy)
print('pred_loss', pred_loss.get_shape())
heatmap_predict = tf.reshape(heatmap_pre, [cfg.batch_size,-1])
heatmap_target = tf.reshape(heatmap, [cfg.batch_size,-1])
heatmap_loss = tf.square(heatmap_predict-heatmap_target)
heatmap_loss = ohkm(labels, heatmap_loss, int(cfg.train.ohem_ratio * cfg.heatmap_size * cfg.heatmap_size))
heatmap_loss = tf.sqrt(heatmap_loss)
# pred_loss = tf.reduce_mean(cross_entropy)
return pred_loss, heatmap_loss
# def loss_ohem(preds, labels, heatmap_pre, heatmap):
def loss_ohem(heatmap_pre, heatmap_m, label):
# labels = tf.cast(labels, tf.int64)
# labels = tf.reshape(labels, (cfg.batch_size,))
# print('pre labels', labels.get_shape())
# labels = tf.one_hot(labels, cfg.classes)
# print('labels', labels.get_shape())
# cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
# print('cross_entropy', cross_entropy.get_shape())
# keep_num = tf.cast(cfg.batch_size * cfg.train.ohem_ratio, tf.int32)
# cross_entropy = tf.reshape(cross_entropy, (cfg.batch_size,))
# print('cross_entropy', cross_entropy.get_shape())
# _, k_index = tf.nn.top_k(cross_entropy, keep_num)
# loss = tf.gather(cross_entropy, k_index)
# print('ohem loss', loss.get_shape())
print ("heatmap_pre.shape",heatmap_pre.shape)
heatmap_predict = tf.reshape(heatmap_pre, [cfg.batch_size,-1])
ones = tf.ones_like(heatmap_predict,dtype=tf.float32)
zeros = tf.zeros_like(heatmap_predict,dtype=tf.float32)
heatmap_target = ones*label
# heatmap_reverse = ones-heatmap_target
# heatmap_decay = tf.where(tf.less(heatmap_target,0.5),heatmap_reverse, heatmap_target)
print ("heatmap_m.shape",heatmap_m.shape)
heatmap_mask = tf.reshape(heatmap_m, [cfg.batch_size,-1])
# heatmap_mask = tf.where(tf.less(heatmap_target,0.5),zeros, ones)
print ("heatmap_target.shape",heatmap_target.shape)
heatmap_loss = tf.square(heatmap_predict-heatmap_target)
heatmap_loss = heatmap_loss*heatmap_mask
# heatmap_loss = heatmap_loss*heatmap_mask*heatmap_decay
print ("heatmap_loss.shape",heatmap_loss.shape)
# heatmap_loss = tf.reshape(heatmap_loss, [cfg.batch_size,-1])
heatmap_loss = tf.reshape(heatmap_loss, [-1])
heatmap_loss = ohkm_all(heatmap_loss, int(cfg.train.ohem_ratio * cfg.heatmap_size * cfg.heatmap_size*cfg.batch_size))
# heatmap_loss = tf.sqrt(heatmap_loss)
return heatmap_loss
# return tf.reduce_mean(loss)
3、将网络模型的loss、预测值等信息汇总
具体汇总封装到ColorRecNet类中,详见run_net_blur.py脚本:
#!/usr/bin/env python
# encoding: utf-8
import tensorflow as tf
import sys
sys.path.append('..')
from models.network_dw import Network
# from models.network_cdcn import Network
from config_blur import cfg
from models.losses_blur import loss, loss_ohem
class ColorRecNet:
def __init__(self, img, heatmap, label, is_training, batcn_norm_decay=0.997):
self.img = img
self.heatmap = heatmap
self.label = label
self.is_training = is_training
self.batch_norm_decay = batcn_norm_decay
self.img_shape = tf.shape(self.img)
backbone = Network()
if is_training:
self.heatmap_pre, self.l2_loss = backbone.inference(self.is_training, self.img)
# self.head, self.l2_loss = backbone.resnet_v1_50(self.is_training, self.img)
else:
self.heatmap_pre, self.conv = backbone.inference(self.is_training, self.img)
# self.head, self.conv = backbone.resnet_v1_50(self.is_training, self.img)
def compute_loss(self):
with tf.name_scope('loss_0'):
heatmapLoss = loss_ohem(self.heatmap_pre, self.heatmap, self.label)
# clsLoss, heatmapLoss = loss(self.head, self.truth, self.heatmap_pre, self.heatmap)
self.heatmap_loss = heatmapLoss
self.all_loss = heatmapLoss + self.l2_loss
# self.all_loss = clsLoss + heatmapLoss*100 + self.l2_loss
return self.all_loss, self.heatmap_loss, self.l2_loss
def predict(self):
'''
only support single image prediction
'''
# pred_score = tf.reshape(self.head, (-1, cfg.classes))
# score = tf.nn.softmax(tf.reshape(self.head, (-1, cfg.classes)))
# class_index = tf.argmax(pred_score, 1)
return self.heatmap_pre, self.conv
五、模型的训练、保存、测试
1、模型训练及保存
通过tensorflow对模型进行训练、利用损失值不断去更新权重信息,使模型学到图片更多内容,然后保存模型,具体训练过程详见mulpi_gpus_train_blur_v2.py脚本:
#!/usr/bin/env python
# encoding: utf-8
import numpy as np
import tensorflow as tf
import sys
# sys.path.insert(0,'/home/xjyu/lgx/')
# sys.path.append(r'/home/xjyu/lgx/blur_quantize/')
from models.run_net_blur_v2 import ColorRecNet
from gen_data_batch_blur import gen_data_batch
from config_blur import cfg
import os
import re
import tensorflow.contrib.slim as slim
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"
def get_variables_to_restore(include_vars=[], exclude_global_pool=False):
variables_to_restore = []
for var in slim.get_model_variables():
if exclude_global_pool and 'global_pool' in var.op.name:
#print(var)
continue
variables_to_restore.append(var)
for var in slim.get_variables_to_restore(include=include_vars):
if exclude_global_pool and 'global_pool' in var.op.name:
#print(var)
continue
variables_to_restore.append(var)
return variables_to_restore
def average_gradients(tower_grads):
"""Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
Args:
tower_grads: List of lists of (gradient, variable) tuples. The outer list
is over individual gradients. The inner list is over the gradient
calculation for each tower.
Returns:
List of pairs of (gradient, variable) where the gradient has been averaged
across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(axis=0, values=grads)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def train(finetune):
is_training = True
# data pipeline
imgs, heatmap, label = gen_data_batch(cfg.data_path, cfg.batch_size*cfg.train.num_gpus)
print cfg.data_path
print ("imgs:",imgs.shape)
print ("heatmap:",heatmap.shape)
print ("label:",label.shape)
imgs_split = tf.split(imgs, cfg.train.num_gpus)
heatmap_split = tf.split(heatmap, cfg.train.num_gpus)
label_split = tf.split(label, cfg.train.num_gpus)
global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False)
#lr = tf.train.piecewise_constant(global_step, cfg.train.lr_steps, cfg.train.learning_rate)
#optimizer = tf.train.AdamOptimizer(learning_rate=lr)
learn_rate_decay_step = int(cfg.train.num_samples / cfg.batch_size / cfg.train.num_gpus * cfg.train.learn_rate_decay_epoch)
learning_rate = tf.train.exponential_decay(cfg.train.learn_rate, global_step, learn_rate_decay_step, cfg.train.learn_rate_decay, staircase=True)
#optimizer = tf.train.GradientDescentOptimizer(learning_rate)
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
# Calculate the gradients for each model tower.
tower_grads = []
with tf.variable_scope(tf.get_variable_scope()):
for i in range(cfg.train.num_gpus):
with tf.device('/gpu:%d' % i):
with tf.name_scope('%s_%d' % (cfg.train.tower, i)) as scope:
model = ColorRecNet(imgs_split[i], heatmap_split[i], label_split[i], is_training)
loss, heatmap_loss, l2_loss = model.compute_loss()
tf.get_variable_scope().reuse_variables()
grads = optimizer.compute_gradients(loss)
tower_grads.append(grads)
if i == 0:
current_loss = loss
current_l2_loss = l2_loss
current_heatmap_loss = heatmap_loss
update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
# print(tf.GraphKeys.UPDATE_OPS)
# print(update_op)
# print(grads)
# vars_det = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="BioRecNet")
grads = average_gradients(tower_grads)
with tf.control_dependencies(update_op):
apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
train_op = tf.group(apply_gradient_op,*update_op)
# GPU config
config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
# Create a saver
# saver = tf.train.Saver(max_to_keep=1000)
saver = tf.compat.v1.train.Saver(max_to_keep=1000)
ckpt_dir = cfg.ckpt_path
# init
sess.run(tf.global_variables_initializer())
if finetune:
checkpoint = '/home/xjyu/lgx/end_face/ckpt1/livenessDet_blur_2-688'
# variables_to_restore = slim.get_variables_to_restore()
# init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
# sess.run(init_assign_op, init_feed_dict)
variables_to_restore = get_variables_to_restore(exclude_global_pool=True)
init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
sess.run(init_assign_op, init_feed_dict)
# running
cnt_epoch = 0
for i in range(1, cfg.train.max_batches):
_, loss_,heatmap_loss_,l2_loss_,lr_ = sess.run([train_op, current_loss, current_heatmap_loss, current_l2_loss, learning_rate])
if(i % 10 == 0):
print(i,': ', ' loss: ', loss_, ' heatmap_loss_:', heatmap_loss_,' l2_loss_:', l2_loss_, ' lr: ', lr_)
if int(i) % int(cfg.train.num_samples / cfg.train.num_gpus / 100) == 0:
cnt_epoch += 1
print "save:"+ckpt_dir+'livenessDet_blur_2-%d'%cnt_epoch
saver.save(sess, ckpt_dir+'livenessDet_blur_2', global_step=cnt_epoch, write_meta_graph=True)
if __name__ == '__main__':
train(finetune=False)
模型训练过程如图所示:
模型保存如下图所示:
2、测试集进行测试
测试之前,需要生成保存测试10个分类结果的目录详见test.py脚本,然后分别对亚洲人、外国人测试样本进行测试,具体测试详见test_v2脚本:
test.py
# encoding: utf-8
import numpy as np
import os
g_step = np.arange(101, 100, -1)
# print(g_step)
for i in range(10):
os.makedirs('/home/xjyu/lgx/end_face/res/blur_%s'%i)
test_v2.py:
#!/usr/bin/env python
# encoding: utf-8
import numpy as np
import tensorflow as tf
import sys
sys.path.insert(0,'/home/xjyu/lgx/end_face/')
from models.run_net_blur_v2 import ColorRecNet
from config_blur import cfg
import cv2
import os
import re
from tqdm import tqdm
from PIL import Image
import random
import shutil
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
def test(img_path, g_step):
is_training = False
cfg.batch_size = 1
a = 0
b = 0
c = 0
d = 0
input_width = 96
input_height = 96
ckpt_dir = cfg.ckpt_path
imgs = os.listdir(img_path)
configer = tf.ConfigProto()
configer.gpu_options.per_process_gpu_memory_fraction = 0.1
# mask_img = cv2.imread("/home/xjyu/lgx/blur_quantize/tools/cv/img/20190918/b.png",0)
# mask_f = mask_img.astype(np.float32)/250.0
with tf.Session(config=configer) as sess:
# imgs_holder = tf.placeholder(tf.float32, shape=[1, 48, 48, 1])
imgs_holder = tf.placeholder(tf.float32, shape=[1, input_height, input_width,1])
model = ColorRecNet(imgs_holder, None, None, is_training)
heatmap_pre, last_layer = model.predict()
saver = tf.compat.v1.train.Saver()
sess.run(tf.global_variables_initializer())
# print ckpt_dir+'livenessDet_v4-'+str(g_step)
saver.restore(sess, ckpt_dir+'livenessDet_blur_2-'+str(g_step))
# saver.restore(sess, ckpt_dir+'livenessDet_128-16-bit4-0727-'+str(g_step))
sess.run(tf.local_variables_initializer())
for i in imgs:
img = cv2.imread(os.path.join(img_path, i))
try:
h, w, _ = img.shape
# h, w = img.shape
except:
print os.path.join(img_path, i)
continue
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_o = np.ones((h,w), dtype=np.float32)*128
img_a = img_gray.astype(np.float32)
img_mean = np.mean(img_a)
scale = 128.0/float(img_mean)
img_b = img_a * scale
img_diff = abs(img_b-img_o)
mean_diff = np.mean(img_diff)
scale_2 = 30.0/mean_diff
img_e=img_b-img_o
img_f=img_e * scale_2
img_g=img_f + img_o
img_g=np.clip(img_g,0,255)
img_h=img_g.astype(np.uint8)
# img = img[:, :, 0]
# image = cv2.resize(img, (48, 48))
image=np.zeros((input_height,input_width),dtype=np.uint8)
# img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1)
# img_abs = np.abs(img_gray-img_blur)
image[0:h,0:w] = img_h[0:h,0:w]
# image[0:h,0:w, 1] = img_abs[0:h,0:w]
z_img=np.zeros((input_height,input_width),dtype=np.uint8)
o_img=np.ones((input_height,input_width),dtype=np.uint8)*255
bmask=np.where(image<5,z_img,o_img)
# bmask=np.where(image[:,:,0]<5,z_img,o_img)
bmask = cv2.resize(bmask, (cfg.heatmap_size, cfg.heatmap_size))
# if True:
# img_data = image.astype(np.float32) * mask_f / 127.5
img_data = image.astype(np.float32) / 127.5
# for j in range(0, 48):
# for k in range(0, 48):
# print(img_data[j, k])
# f_write.close()
img_input = np.reshape(img_data, [1, input_height, input_width, 1])
# print type(img_input)
# print img_input.shape
heatmap_, last_layer_ = sess.run([heatmap_pre, last_layer], feed_dict={imgs_holder: img_input})
heatmap_s=heatmap_.reshape(-1)
bmask_s=bmask.reshape(-1)
# print heatmap_s.shape, bmask_s.shape
mask_index = np.argwhere(bmask>120)
# print mask_index.shape, img_path
heatmap_v=heatmap_s[mask_index]
# heatmap_mean = np.max(heatmap_v)
heatmap_mean = np.mean(heatmap_v)
heatmap_mean = 1-heatmap_mean
if heatmap_mean <= 0.1 :
shutil.copy(os.path.join(img_path, i),'res/blur_0')
continue
elif heatmap_mean <= 0.2 :
shutil.copy(os.path.join(img_path, i),'res/blur_1')
continue
elif heatmap_mean <= 0.3 :
shutil.copy(os.path.join(img_path, i),'res/blur_2')
continue
elif heatmap_mean <= 0.4 :
shutil.copy(os.path.join(img_path, i),'res/blur_3')
continue
elif heatmap_mean <= 0.5 :
shutil.copy(os.path.join(img_path, i),'res/blur_4')
continue
elif heatmap_mean <= 0.6 :
shutil.copy(os.path.join(img_path, i),'res/blur_5')
continue
elif heatmap_mean <= 0.7 :
shutil.copy(os.path.join(img_path, i),'res/blur_6')
continue
elif heatmap_mean <= 0.8 :
shutil.copy(os.path.join(img_path, i),'res/blur_7')
continue
elif heatmap_mean <= 0.9 :
shutil.copy(os.path.join(img_path, i),'res/blur_8')
continue
else:
shutil.copy(os.path.join(img_path, i),'res/blur_9')
continue
print('{:^{}}|{:^{}}|{:^{}}|{:^{}}|{:^{}}'.format(a, 20, b, 20, c, 20, d, 20, img_path.strip().split('/')[-2] + '/' + img_path.strip().split('/')[-1], 20))
tf.reset_default_graph()
if __name__ == '__main__':
g_step = np.arange(25,24,-1)
for i in g_step:
print('*****************************', i, '*****************************')
print('{:^{}}|{:^{}}|{:^{}}|{:^{}}'.format('no_live', 20, 'live', 20, 'unkown', 20, 'img_path', 20))
# img_path = ['/home/xjyu/lgx/end_face/test/blur_test_jpg/'] # 亚洲人测试图片
img_path = ['/home/xjyu/blur_test_jpg/'] # 外国人测试图片
for j in img_path:
test(j, i)
亚洲人测试结果:
外国人测试结果:
训练集数据达到42万张
测试集数据2000张
需要数据集,请添加公众号,并留言获取:
本文地址:https://blog.csdn.net/yegeli/article/details/109630078