欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  科技

模糊分类项目

程序员文章站 2022-06-26 16:19:42
人脸模糊分类该项目主要是用于对亚洲人脸、外国人脸进行10个等级的模糊分类,大体流程为:1、造数据,对现有的人脸数据进行模糊处理,处理为10个等级模糊,一般使用模糊手段为:高斯模糊、噪声模糊、运动模糊等。2、模糊等级处理好,对数据进行数据增强,左右变换增加数据集、归一化处理。3、将所有的数据使用tensorflow框架,多线程方式生成tfrecord文件,具体细节详见代码。4、搭建神经网络,其中8层卷积层,2层池化层,计算loss值,及预测值。5、训练集进行模型训练,保存模型,测试集进行测试。...

人脸模糊分类

该项目主要是用于对亚洲人脸、外国人脸进行10个等级的模糊分类,大体流程为:
1、造数据,对现有的人脸数据进行模糊处理,处理为10个等级模糊,一般使用模糊手段为:高斯模糊、噪声模糊、运动模糊等。
2、模糊等级处理好,对数据进行数据增强,左右变换增加数据集、归一化处理。
3、将所有的数据使用tensorflow框架,多线程方式生成tfrecord文件,具体细节详见代码。
4、搭建神经网络,其中8层卷积层,2层池化层,计算loss值,及预测值。
5、训练集进行模型训练,保存模型,测试集进行测试。

一、数据集构造

通过对2万多张的清晰原始图片分为三份,对每份进行模糊处理,使其拥有三种不同类型的模糊,如高斯模糊,噪声模糊,运动模糊
原始样本数据如下:
模糊分类项目
模糊分类项目
模糊分类项目

1、添加高斯模糊生成10个等级模糊类别

使用cv2.GaussianBlur(img, (15, 15), 1)算法对原始清晰图进行10个等级的模糊处理,具体处理方式如gauss_class.py脚本:

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2
import numpy as np
import sys,os



# 创建目录
for i in range(0,10):
    try:
        path = os.makedirs('./class/gauss_class/gauss_blur_%d'%i)
        # path = os.makedirs('./test/gauss_class/gauss_blur_%d'%i)
    except:
        continue


png_dir=sys.argv[1]
gt_imdb=[]
count = 0


# 读取目录下的图片到列表中
for item in os.listdir(png_dir):
    gt_imdb.append(os.path.join(png_dir,item))

# 处理每张图片
for jpg_file in gt_imdb:
    if os.path.exists(jpg_file):
        img = cv2.imread(jpg_file,0)
        
    else:
        print jpg_file
        continue
    h,w = img.shape
    img_z=np.zeros((h,w),dtype=np.uint8)
    img_o=np.ones((h,w),dtype=np.uint8)

    count += 1
    mask_img = np.where(img>0, img_o, img_z)
    step = 0.2
    if count % 10000 ==0:
        print(count)
    jj =0
    for j in range(10):
        # 使用高斯模糊算法,使用15*15的高斯核,使用在X轴方向上不同的标准差进行高斯模糊
        img_b = cv2.GaussianBlur(img, (15, 15), step*(j+1)) 

        # img_b = cv2.GaussianBlur(img, (15, 15), step*(j+1))
        img_b = np.where(mask_img>0, img_b, img_z)
        jj +=1
        savename = "./class/gauss_class/gauss_blur_%d/gauss_blur_%d.jpg"%(j,count)
        # savename = "./class/gauss_class/gauss_blur_%d/gauss_blur_%d.jpg"%(jj,count)
        cv2.imwrite(savename, img_b)

模糊分类项目
模糊分类项目
模糊分类项目
模糊分类项目

2、添加高斯噪声生成10个等级模糊类别

通过使用均值及方差的方式,使用不同的权重对原始图片添加10个等级的噪声,具体操作如noise_class.py脚本:

# encoding:utf-8

import cv2
import numpy as np
import base64
import json
import sys,os
import random
import tqdm

gauss_thd_0 = 0.1
gauss_thd_1 = 0.2

def sp_noise(image,prob):
    '''
    添加椒盐噪声
    prob:噪声比例 
    '''
    output = np.zeros(image.shape,np.uint8)
    thres = 1 - prob 
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            rdn = random.random()
            if rdn < prob:
                output[i][j] = 0
            elif rdn > thres:
                output[i][j] = 255
            else:
                output[i][j] = image[i][j]
    return output


def gauss_noise(image, mean=0, var=0.3):
    ''' 
        添加高斯噪声
        mean : 均值 
        var : 方差
    '''
    image = np.array(image/255.0, dtype=float)
    scale = random.uniform(0.3,1.0)
    h, w = image.shape
    h_s = int(scale*h)
    w_s = int(scale*w)
    noise = np.random.normal(mean, var, (h_s,w_s))
    noise = cv2.resize(noise,(w,h))
    # out = image
    # out = noise
    out = image + noise
    # if out.min() < 0:
    #     low_clip = -1.
    # else:
    #     low_clip = 0.
    low_clip = 0.
    out = np.clip(out, low_clip, 1.0)
    out = np.uint8(out*255)
    #cv.imshow("gasuss", out)
    return out


for i in range(0,10):
    try:
        path = os.makedirs('./class/noise_class/noise_blur_%d'%i)
        # path = os.makedirs('./test/noise_class/noise_blur_%d'%i)
    except:
        continue


png_dir=sys.argv[1]
gt_imdb=[]
count = 0


for item in os.listdir(png_dir):
    gt_imdb.append(os.path.join(png_dir,item))

for jpg_file in gt_imdb:
    if os.path.exists(jpg_file):
        img = cv2.imread(jpg_file,0)
        
    else:
        print jpg_file
        continue
    h,w = img.shape
    img_z=np.zeros((h,w),dtype=np.uint8)
    img_o=np.ones((h,w),dtype=np.uint8)

    count += 1
    mask_img = np.where(img>0, img_o, img_z)
    step = 0.008

    # step = 0.02
    jj =0
    for j in range(10):
        # prob = random.uniform(gauss_thd_0,gauss_thd_1)
        prob = step*(j+1)
        img_g = gauss_noise(img, var = prob)
        img_g = np.where(mask_img>0, img_g, img_z)        
        jj += 1
        savename = "./class/noise_class/noise_blur_%d/noise_blur_%d.jpg"%(j, count)
        #savename = "./class/noise_class/noise_blur_%d/noise_blur_%d.jpg"%(jj, count)
        
        cv2.imwrite(savename, img_g)

模糊分类项目

3、添加运动模糊生成10个等级的模糊类别

通过生成卷积核和锚点,使原始图片产生不同角度的运动模糊,详见motion_class.py脚本:

# coding=utf-8
import math
import numpy as np
import cv2
import random
import sys, os
#生成卷积核和锚点
def genaratePsf(length,angle):
    EPS=np.finfo(float).eps                                 
    alpha = (angle-math.floor(angle/ 180) *180) /180 * math.pi
    cosalpha = math.cos(alpha)  
    sinalpha = math.sin(alpha)  
    if cosalpha < 0:
        xsign = -1
    elif angle == 90:
        xsign = 0
    else:  
        xsign = 1
    psfwdt = 1;  
    #模糊核大小
    sx = int(math.fabs(length*cosalpha + psfwdt*xsign - length*EPS))  
    sy = int(math.fabs(length*sinalpha + psfwdt - length*EPS))
    psf1=np.zeros((sy,sx))
    half = length/2
    #psf1是左上角的权值较大,越往右下角权值越小的核。
    #这时运动像是从右下角到左上角移动
    for i in range(0,sy):
        for j in range(0,sx):
            psf1[i][j] = i*math.fabs(cosalpha) - j*sinalpha
            rad = math.sqrt(i*i + j*j)
            # print rad
            if  rad >= half and math.fabs(psf1[i][j]) <= psfwdt:  
                temp = half - math.fabs((j + psf1[i][j] * sinalpha) / cosalpha)  
                psf1[i][j] = math.sqrt(psf1[i][j] * psf1[i][j] + temp*temp)
            psf1[i][j] = psfwdt + EPS - math.fabs(psf1[i][j]);  
            if psf1[i][j] < 0:
                psf1[i][j] = 0
    #运动方向是往左上运动,锚点在(0,0)
    anchor=(0,0)
    #运动方向是往右上角移动,锚点一个在右上角
    #同时,左右翻转核函数,使得越靠近锚点,权值越大
    if angle<90 and angle>0:
        psf1=np.fliplr(psf1)
        anchor=(psf1.shape[1]-1,0)
    elif angle>-90 and angle<0:#同理:往右下角移动
        psf1=np.flipud(psf1)
        psf1=np.fliplr(psf1)
        anchor=(psf1.shape[1]-1,psf1.shape[0]-1)
    elif anchor<-90:#同理:往左下角移动
        psf1=np.flipud(psf1)
        anchor=(0,psf1.shape[0]-1)
    psf1=psf1/psf1.sum()
    return psf1,anchor


for i in range(0,10):
    try:
        path = os.makedirs('./class/motion_class/motion_blur_%d'%i)
        # path = os.makedirs('./test/motion_class/motion_blur_%d'%i)
    except:
        continue


png_dir=sys.argv[1]
gt_imdb=[]

count = 0
count_e = 0


for item in os.listdir(png_dir):
    gt_imdb.append(os.path.join(png_dir,item))

for jpg_file in gt_imdb:
    if os.path.exists(jpg_file):
        img = cv2.imread(jpg_file,0)
        h,w = img.shape
    else:
        print jpg_file
        continue
    # if count > 100:
    #     break
    img_z=np.zeros((h,w),dtype=np.uint8)
    img_o=np.ones((h,w),dtype=np.uint8)
    mask_img = np.where(img>0, img_o, img_z)
    # step = 1.3
    step = 1
    count += 1
    if count % 10000 == 0:
        print(count)
    jj = 0
    for j in range(0,10):

        lenth = step * j + 3
        # step += 0.04
        # lenth = random.randint(11,25)
        angle = random.randint(280,350)
        kernel,anchor=genaratePsf(lenth,angle)
        try:
            img_m=cv2.filter2D(img,-1,kernel,anchor=anchor)
        except:
            count_e += 1
            print count_e
            continue
        jj += 1
        img_m = np.where(mask_img>0, img_m, img_z)
        #savename = "./class/motion_class/motion_blur_%d/motion_blur_%d.jpg"%(j, count)
        savename = "./class/motion_class/motion_blur_%d/motion_blur_%d.jpg"%(jj, count)
        cv2.imwrite(savename, img_m)

模糊分类项目模糊分类项目

二、数据预处理

1、flip左右变换

通过flip左右变换,增加了数据集中的数据样本,具体操作详见flip.py脚本。

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2 
import numpy as np
import sys,os
import random

png_dir = sys.argv[1]
gt_imdb = []
file_dict = {}

img_zero = np.zeros((96,96),dtype=np.float64)
img_one = np.ones((96,96),dtype=np.float64) *128



for item in os.listdir(png_dir):
    dir_path = os.path.join(png_dir,item)
    dirs = os.listdir(dir_path)
    # try:
    #     path = os.makedirs('./normal/normal_%s/%s'%(png_dir,item))
    # except:
    #     continue
    
    for d in dirs:
        gt_imdb.append(os.path.join(dir_path,d))
        # print(len(gt_imdb))
    file_dict[item] = gt_imdb
    gt_imdb = []


for k in file_dict.keys(): 
    count = 0
    for jpg_file in file_dict[k]:
        if os.path.exists(jpg_file):
            img = cv2.imread(jpg_file,0)
        else:
            print jpg_file
            continue

        count += 1
        if count % 1000 == 0:
            print count

        img_a = cv2.flip(img,1,dst=None)

        savename = "/home/xjyu/lgx/end_face/res_w4/%s/flip_%d.jpg"%(k,count)
        cv2.imwrite(savename, img_a)

为了使数据更丰富,还把最原始清晰的图片放入分类0中,把原始最模糊的分类放入9中,后续因测试样本的数据不同,还加入了阴阳脸、毛玻璃脸等数据,并且专门对外国人数据进行了处理。

2、归一化操作

通过均值对图片进行归一化,这里归一化提升了图片的整体亮度,使所有数据样本在相等的格局下进行训练。详见normal.py脚本:

#!/usr/bin/env python
# _*_ coding:utf-8 _*_
import cv2 
import numpy as np
import sys,os
import random

png_dir = sys.argv[1]
gt_imdb = []
file_dict = {}

img_zero = np.zeros((96,96),dtype=np.float64)
img_one = np.ones((96,96),dtype=np.float64) *128



for item in os.listdir(png_dir):
    dir_path = os.path.join(png_dir,item)
    dirs = os.listdir(dir_path)
    try:
        path = os.makedirs('./normal/normal_%s/%s'%(png_dir,item))
    except:
        continue
    
    for d in dirs:
        gt_imdb.append(os.path.join(dir_path,d))
        # print(len(gt_imdb))
    file_dict[item] = gt_imdb
    gt_imdb = []


for k in file_dict.keys(): 
    count = 0
    for jpg_file in file_dict[k]:
        if os.path.exists(jpg_file):
            img = cv2.imread(jpg_file,0)
        else:
            print jpg_file
            continue

        img_a = img.astype(np.float64)
        x ,y = img.shape[0:2]
        img_o = np.ones((x,y),dtype=np.float64) * 128   


        count += 1
        if count % 1000 == 0:
            print count
        mean = np.mean(img_a)
        scale = 128.0/float(mean)
        img_b = img_a * scale
        img_diff = abs(img_b - img_o)
        mean_diff = np.mean(img_diff)
        scale_2 = 30.0/mean_diff
        img_e = img_b - img_o
        img_f = img_e * scale_2
        img_g = img_f + img_o


        savename = "./normal/normal_%s/%s/%s_%d.jpg"%(png_dir,k,k,count)
        # print(savename)
        img_g=np.clip(img_g,0,255)
        cv2.imwrite(savename, img_g.astype(np.uint8))

模糊分类项目
归一化处理后总数据42万

3、汇总文件

将归一化后的图片按照分类类别,汇总为txt文件,并打乱文件,详见汇总gen_txt_all.py、打乱shuffle.py脚本:
gen_txt_all.py

import sys,os
import time  
import cv2
import json
import random

path = sys.argv[1]
savefile = sys.argv[2]


num_dict = {"0":1.0,"1":0.9,"2":0.8,"3":0.7,"4":0.6,"5":0.4,"6":0.3,"7":0.2,"8":0.1,"9":0.0}
fw = open(savefile,'w')
files = os.listdir(path)
for item in files:
    dir_path = os.path.join(path,item)
    dirs = os.listdir(dir_path)
    for items in dirs:
        d_path = os.path.join(dir_path,items)
        d_s = os.listdir(d_path)
        gt_imdb = []
        str_num = items.split('_')[-1]
        num = num_dict[str_num]
        for d in d_s:
            gt_imdb.append(os.path.join(d_path,d))

        # random.shuffle(gt_imdb)
        # print(len(gt_imdb))

        for jpgFile in gt_imdb:
            fw.writelines(jpgFile)
            fw.writelines(' %.1f\n'%num)
fw.close

模糊分类项目

shuffle.py:

#coding:utf-8
import os
import random
import sys
import time

txt_file = sys.argv[1]
fw_file = sys.argv[2]
dataset = []
fr=open(txt_file, 'r')
fw=open(fw_file, 'w')
for line in fr.readlines():
    dataset.append(line)
random.shuffle(dataset)
for data in dataset:
    fw.writelines(data)
fw.close

模糊分类项目

三、将数据集生成Tfrecord文件

将汇总好的txt文件通过tfrecord文件来保存和读取数据,具体操作详见gen_tf_records_blur_v2.py脚本。
tfrecord详解如下:https://blog.csdn.net/sinat_29957455/article/details/83316173

# encoding: utf-8

import numpy as np
import tensorflow as tf
import os
import cv2
from tqdm import tqdm
import re
import sys
import random
sys.path.append('/home/xjyu/lgx/')
print(sys.path)
from config_blur import cfg
from multiprocessing import Process,Manager

sp_thd_0 = 0.006
sp_thd_1 = 0.03
gauss_thd_0 = 0.02
gauss_thd_1 = 0.1

def sp_noise(image,prob):
    '''
    添加椒盐噪声
    prob:噪声比例 
    '''
    output = np.zeros(image.shape,np.uint8)
    thres = 1 - prob 
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            rdn = random.random()
            if rdn < prob:
                output[i][j] = 0
            elif rdn > thres:
                output[i][j] = 255
            else:
                output[i][j] = image[i][j]
    return output


def gauss_noise(image, mean=0, var=0.3):
    ''' 
        添加高斯噪声
        mean : 均值 
        var : 方差
    '''
    image = np.array(image/255.0, dtype=float)
    noise = np.random.normal(mean, var, image.shape)
    # out = image
    # out = noise
    out = image + noise
    # if out.min() < 0:
    #     low_clip = -1.
    # else:
    #     low_clip = 0.
    low_clip = 0.
    out = np.clip(out, low_clip, 1.0)
    out = np.uint8(out*255)
    #cv.imshow("gasuss", out)
    return out

def load_file(file_path, file_dir):
    '''
    load imgs_path, classes and labels
    '''
    dataset = []
    with open(file_path, 'r') as f:
        lines = f.readlines()
        for line in lines:
            data_example = dict()
            ss = line.strip().split(' ')
            img_path = os.path.join(file_dir,ss[0])
            # label1 = float(line.strip().split(' ')[1])
            # label2 = float(line.strip().split(' ')[3])
            data_example['filename'] = img_path
            data_example['label'] = float(ss[1])
            # if float(ss[1]) > 0.45:
            #     data_example['label'] = 1.0
            # else:
            #     data_example['label'] = 0.0                
            dataset.append(data_example)
            
    return dataset

def extract_image(image_path, is_resize=True):
    '''
    get b->g->r image data
    '''
    img = cv2.imread(image_path, 0)
    # img_blur = cv2.GaussianBlur(img, (5, 5), 1)
    # img_abs = np.abs(img-img_blur)
    try:
        h, w = img.shape
    except:
        print image_path

    # print image_path
    if is_resize:
        assert(h <= cfg.height)
        assert(w <= cfg.width)
        # image = cv2.resize(img, (cfg.height, cfg.height))
        # cv2.imshow("img", image)
        # cv2.waitKey(0)
        # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        if random.choice([0,1]) == 0:
            img = cv2.flip(img, 1)
            
        if w == cfg.width :
            start_x = 0
        else:
            start_x = random.randint(0, cfg.width-w)
        if h == cfg.height:
            start_y = 0
        else:
            start_y = random.randint(0, cfg.height-h)
        image = np.zeros((cfg.height,cfg.width),dtype=np.uint8)
        image[start_y:start_y+h,start_x:start_x+w] = img[0:h,0:w]
        # image = np.zeros((cfg.height,cfg.width, 2),dtype=np.uint8)
        # image[start_y:start_y+h,start_x:start_x+w, 0] = img[0:h,0:w]
        # image[start_y:start_y+h,start_x:start_x+w, 1] = img_abs[0:h,0:w]
        z_img=np.zeros((cfg.height,cfg.width),dtype=np.uint8)
        o_img=np.ones((cfg.height,cfg.width),dtype=np.uint8)*255
        bmask=np.where(image<2,z_img,o_img)
        # bmask=np.where(image[:,:,0]<2,z_img,o_img)
        
        image_heatmap=cv2.resize(bmask,(cfg.heatmap_size,cfg.heatmap_size))
        # image = image[:, :, 0]
    else:
        image = img
        image_heatmap=bmask
    
    # cv2.imshow('img', image)
    # cv2.waitKey(0)
    image_data = np.array(image, dtype='uint8')
    return image_data, image_heatmap

def extract_heatmap_image(filename):
    '''
    get b->g->r image data
    '''
    heatmap_name = filename.replace("normalize", "mask")
    image = cv2.imread(heatmap_name,0)
    image = cv2.resize(image, (cfg.heatmap_size, cfg.heatmap_size))
    # cv2.imshow('aaaa',image)
    # cv2.waitKey(0)
    image_data = np.array(image, dtype='uint8')
    return image_data

def _add_to_tfrecord(dataset, index, example_list, total_len, tfrecord_writer, read_lock, write_lock):
    while True:
        read_lock.acquire()
        if index.value > total_len - 1:
            print 'process end !!'
            print index.value, total_len
            read_lock.release()
            break
        num = index.value
        index.value += 1
        read_lock.release()
        data_next=dataset[num]
        filename = data_next['filename']
        label = data_next['label']
        if num%1000 == 0:
            print num, filename
        # time.sleep(0.1)
        img, img_heatmap = extract_image(filename, is_resize=True)
        img = img.tostring()
        img_heatmap = img_heatmap.tostring()
        example = tf.train.Example(features=tf.train.Features(feature={
                      'feature': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img])),
                      'label' : tf.train.Feature(float_list = tf.train.FloatList(value=[label])),
                      'heatmap': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img_heatmap]))
                  }))
        # image_data, height, width = _process_image_withoutcoder(filename)
        # example = _convert_to_example_simple(data_next, image_data)
        write_lock.acquire()
        example_list.append(example.SerializeToString())
        write_lock.release()

def run_encode(file_path, file_dir, tf_records_filename):
    '''
    encode func
    '''
    dataset= load_file(file_path, file_dir)
    writer = tf.io.TFRecordWriter(tf_records_filename)

    total_len = len(dataset)
    manager=Manager()
    read_lock = manager.Lock()
    write_lock = manager.Lock()
    index = manager.Value('i',0)
    example_list = manager.list()
    proc=[Process(target=_add_to_tfrecord,args=(dataset,index,example_list,total_len,writer,read_lock,write_lock)) for i in xrange(24)]
    for p in proc:
        p.start()
    for p in proc:
        p.join()

    for item in example_list:
        writer.write(item)

    # for i in tqdm(range(imgs_path.shape[0])):
    #     img = extract_image(imgs_path[i], is_resize=True)
    #     # print imgs_path[i], labels[i]
    #     img = img.tostring()
    #     img_heatmap = extract_heatmap_image(imgs_path[i], labels1[i], labels2[i])
    #     img_heatmap = img_heatmap.tostring()
    #     example = tf.train.Example(features=tf.train.Features(feature={
    #                   'feature': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img])),
    #                   'heatmap': tf.train.Feature(bytes_list = tf.train.BytesList(value=[img_heatmap]))
    #               }))
    #     writer.write(example.SerializeToString())
    writer.close()


if __name__ == '__main__':
    # file_path = '/home/xjyu/sourcecode/liveness/blur_quantize/prepare_data/train.txt'
    # file_dir = '/home/xjyu/blur_data_729/images'
    file_path = '/home/xjyu/lgx/end_face/txt/txt_class_shuffle/shuffle_class10_8.txt'
    file_dir = '/home/xjyu/lgx/end_face'
    
    tf_records_filename = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class10_8.records'

    run_encode(file_path, file_dir, tf_records_filename)

模糊分类项目

四、搭建卷积神经网络

通过搭建8层卷积层,2层池化层,对图片进行学习,预测图片的分类,通过预测值,与真实值计算loss值。
全文配置文件如config_blur.py脚本:

from easydict import EasyDict as edict
import numpy as np
import tensorflow as tf
import os

cfg = edict()

cfg.classes = 2

cfg.names = ['False', 'True']
cfg.batch_size = 256

cfg.height = 96
cfg.width = 96
cfg.channel = 1
cfg.heatmap_size = 6
cfg.theta = 0.7

# cfg.data_path = '/home/xjyu/lgx/blur_quantize/data/tfrecord_files/train_48x48_20190617.records'
# cfg.ckpt_path = '/home/xjyu/lgx/blur_quantize/ckpt/20190617_48x48/' # v35 nice
# cfg.data_path = '/home/xjyu/lgx/blur_quantize/data/tfrecord_files/train_128_rgb_v1.records'


# cfg.data_path = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class_f.records'
cfg.data_path = '/home/xjyu/lgx/end_face/tf_recoder/shuffle_class10_8.records'
# cfg.ckpt_path = '/home/xjyu/lgx/end_face/ckpt/ckpt66_11/' # v35 nice ckpt_f/livenessDet_blur_3-27
cfg.ckpt_path = '/home/xjyu/lgx/end_face/ckpt/ckpt10_7/' # v35 nice    

# training options
cfg.train = edict()

cfg.train.ignore_thresh = .5
cfg.train.ohem_ratio = 0.1
cfg.train.momentum = 0.9
cfg.train.bn_training = True
cfg.train.weight_decay = 0.000001 # 0.00004
cfg.train.learning_rate = [1e-3, 1e-4, 1e-5]
cfg.train.max_batches = 2000000 # 63000
cfg.train.lr_steps = [10000., 20000.]
cfg.train.lr_scales = [.1, .1]
cfg.train.num_gpus = 1
cfg.train.tower = 'tower'

cfg.train.learn_rate = 0.001
cfg.train.learn_rate_decay = 0.8 # 0.9
cfg.train.learn_rate_decay_epoch = 2
# cfg.train.num_samples = 353331
# cfg.train.num_samples = 353506
# cfg.train.num_samples = 479755
cfg.train.num_samples = 424165
# cfg.train.num_samples = 591102
cfg.epochs = 100
cfg.PRINT_LAYER_LOG = True

'''
quaitization_param
'''
cfg.P_BITW = 4
cfg.P_BITA = 4
cfg.P_BITG = 32
cfg.P_is_quantize = True
cfg.P_net_batchnorm = True

1、搭建神经网络

使用8层卷积层、2层池化层,对图片进行学习,使用dorefa插件对模型进行量化处理。
模型搭建如network_cdcn.py脚本:

#!/usr/bin/env python
# encoding: utf-8

# import tensorflow as tf
# import tensorflow.contrib.slim as slim
# from tensorflow.python.framework import ops
# import sys
# sys.path.append('..')
# import numpy as np
# from config import cfg
# from models import resnet_v1, resnet_utils
# from tensorflow.contrib import layers as layers_lib
# from tensorflow.contrib.framework.python.ops import add_arg_scope
# from tensorflow.contrib.framework.python.ops import arg_scope
# from tensorflow.contrib.layers.python.layers import initializers
# from tensorflow.contrib.layers.python.layers import layers
# from tensorflow.contrib.layers.python.layers import regularizers
# from tensorflow.contrib.layers.python.layers import utils
# from tensorflow.python.framework import ops
# from tensorflow.python.ops import array_ops
# from tensorflow.python.ops import nn_ops
# from tensorflow.python.ops import variable_scope

import tensorflow as tf
import tensorflow.contrib.slim as slim
from tensorflow.python.framework import ops
import sys
sys.path.append('..')
import numpy as np
from config_blur import cfg
import re

from dorefa import get_dorefa
fw = None
fa = None
fg = None

PRINT_LAYER_LOG = cfg.PRINT_LAYER_LOG

is_quantize = cfg.P_is_quantize

from contextlib import contextmanager
@contextmanager
def custom_getter_scope(custom_getter):
    scope = tf.get_variable_scope()
    if False:
        with tf.variable_scope(
                scope, custom_getter=custom_getter,
                auxiliary_name_scope=False):
            yield
    if True:
        ns = tf.get_default_graph().get_name_scope()
        with tf.variable_scope(
                scope, custom_getter=custom_getter):
            with tf.name_scope(ns + '/' if ns else ''):
                yield

def remap_variables(fn):
    tf.python_io.tf_record_iterator
    def custom_getter(getter, *args, **kwargs):
        v = getter(*args, **kwargs)
        return fn(v)
    return custom_getter_scope(custom_getter)

def prelu(inputs):
    alphas = tf.get_variable('alpha', inputs.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32)
    pos = tf.nn.relu(inputs)
    neg = alphas * (inputs - abs(inputs)) * 0.5
    return pos + neg

def pnet_action_relu6(inputs):
    res = tf.nn.relu6(inputs)
    if P_is_quantize:
        res = P_fa(res)
    return res

def p_net_prelu(inputs):
    if cfg.P_net_batchnorm:
        x = inputs
        name = re.sub(r"tower_\d/", "", x.op.name)
        N, H, W, oc = x.get_shape().as_list()
        affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
                            initializer=np.array(np.ones((1,1,1,oc)), dtype=np.float32))
        affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
                            initializer=np.array(np.zeros((1,1,1,oc)), dtype=np.float32))
        inputs = (tf.abs(affine_k) + 1.0) * x + affine_b

    alphas = tf.get_variable('alpha', inputs.get_shape()[-1],initializer=tf.constant_initializer(0.0),dtype=tf.float32)
    with variable_scope.variable_scope("prelu") as sc:
        pos = tf.nn.relu(inputs)
        neg = inputs - abs(inputs)
        neg = alphas * neg * 0.5

        if P_is_quantize:
            pos = P_fa(pos)
            neg = P_fa(neg, is_neg = True)
        res = pos + neg
    print("action neeed tensor name",res)
    return res

def action_relu6(inputs):
    if True:
        x = inputs
        name = re.sub(r"tower_\d/", "", x.op.name)
        if "fc" not in name:
            if len(x.shape) == 2:
                N, oc = x.get_shape().as_list()
                affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
                                    initializer=np.array(np.ones((1,oc)), dtype=np.float32))
                affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
                                    initializer=np.array(np.zeros((1,oc)), dtype=np.float32))
            else:
                N, H, W, oc = x.get_shape().as_list()
                affine_k = tf.get_variable(name='potato/'+name+'/affine-k',
                                    initializer=np.array(np.ones((1,1,1,oc)), dtype=np.float32))
                affine_b = tf.get_variable(name='potato/'+name+'/affine-b',
                                    initializer=np.array(np.zeros((1,1,1,oc)), dtype=np.float32))
            inputs = (tf.abs(affine_k) + 1.0) * x + affine_b
    # res = tf.nn.relu6(inputs)
    return inputs

# batch_norm_params_1 = {
#     'is_training': True, 'decay': 0.9995,
#     'epsilon': 1e-5, 'scale': True,
#     'updates_collections': ops.GraphKeys.UPDATE_OPS, 'center': True,
#     #'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
#     'trainable': cfg.train.bn_training,
# }

def network_arg_scope(
        is_training=True, weight_decay=cfg.train.weight_decay, batch_norm_decay=0.9,
        batch_norm_epsilon=1e-5, batch_norm_scale=False):
    batch_norm_params = {
        'is_training': is_training, 'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale,
        'updates_collections': ops.GraphKeys.UPDATE_OPS, 'center': False,
        #'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ],
        'trainable': cfg.train.bn_training,
    }

    with slim.arg_scope(
            [slim.conv2d, slim.separable_convolution2d],
            weights_regularizer = slim.l2_regularizer(weight_decay),
            weights_initializer = slim.variance_scaling_initializer(),
            trainable = is_training,
            # activation_fn = pnet_action_relu6,
            activation_fn = action_relu6,
            normalizer_fn = slim.batch_norm,
            # normalizer_fn = None,
            normalizer_params = batch_norm_params,
            padding='same'):
        with slim.arg_scope([slim.batch_norm], **batch_norm_params) as arg_sc:
            return arg_sc


class Network(object):
    def __init__(self):
        pass

    def inference(self, mode, inputs, scope='BioRecNet'):
        is_training = mode

        if is_quantize:
            global fw,fa,fg
            # BITW,BITA,BITG = [int(tmp) for tmp in bitwidth.split(",")]
            BITW = cfg.P_BITW
            BITA = cfg.P_BITA
            BITG = cfg.P_BITG
            if fw == None:
                fw, fa, fg = get_dorefa(BITW,BITA,BITG)
                
            # global layer_num
            # if layer_num > 1:
            #     layer_num = 1
        
        def new_get_variable(v):
            name = v.op.name
            if not name.endswith('weights'):
                return v
            else:
                if is_quantize:
                    return fw(v)
                else:
                    return v

        def conv2d(inputs, c_outputs, s, name, padding='same'):
            # global layer_num
            # layer_num = layer_num + 1
            output = slim.conv2d(inputs, num_outputs=c_outputs, kernel_size=[3,3], stride=s, padding=padding, scope=name)

            if is_quantize:
                output = fa(output)
                output = fg(output)
            return output

        def max_unpool_2x2(x):
            out = tf.concat([x, tf.zeros_like(x)], 3)
            out = tf.concat([out, tf.zeros_like(out)], 2)
            # out = tf.concat([x, x], 3)
            # out = tf.concat([out, out], 2)
            input_shape = x.shape.as_list()
            out_shape = [-1,input_shape[1]*2,input_shape[2]*2,input_shape[3]]
            return tf.reshape(out, out_shape)

        def maxpool2x2(input, name):
            # global layer_num
            # layer_num = layer_num + 1
            output = slim.max_pool2d(input, kernel_size=[2, 2], stride=2, scope=name)
            return output

        def fully_connected(input, c_outputs, name):
            output = slim.fully_connected(input, c_outputs, activation_fn=None,normalizer_fn=None, scope=name)
            if PRINT_LAYER_LOG:
                print(name, output.get_shape())
            return output

        with slim.arg_scope(network_arg_scope(is_training=is_training)):
            with tf.compat.v1.variable_scope(scope, reuse=False), remap_variables(new_get_variable):
                conv0 = conv2d(inputs, 32, 1, name='conv_0') # 96 -
                print ('conv0:',conv0.shape)
                conv1 = conv2d(conv0, 32, 2, name='conv_1') # 48 -
                print ('conv1:',conv1.shape)
                conv2 = conv2d(conv1, 32, 1, name='conv_2') # 48 -
                print ('conv2:',conv2.shape)
                conv3 = conv2d(conv2, 32, 2, name='conv_3') # 24 -
                print ('conv3:',conv3.shape)
                conv4 = conv2d(conv3, 32, 1, name='conv_4') # 24 -
                print ('conv4:',conv4.shape)
                conv5 = conv2d(conv4, 64, 2, name='conv_5') # 12 -
                print ('conv5:',conv5.shape)
                conv6 = conv2d(conv5, 64, 1, name='conv_6') # 12 -
                print ('conv6:',conv6.shape)
                conv7 = conv2d(conv6, 64, 2, name='conv_7') # 6 -
                print ('conv7:',conv7.shape)
                # ap0=tf.nn.avg_pool(conv3, ksize=4, strides=4, padding='VALID')
                ap0=tf.nn.max_pool(conv3, ksize=4, strides=4, padding='VALID')
                print ('ap0:',ap0.shape)
                # ap1=tf.nn.avg_pool(conv5, ksize=2, strides=2, padding='VALID')
                ap1=tf.nn.max_pool(conv5, ksize=2, strides=2, padding='VALID')
                print ('ap1:',ap1.shape)
                concat0=tf.concat([ap0,ap1,conv7],3,name='concat0')
                conv8 = conv2d(concat0, 64, 1, name='conv_8') # 6 -
                print ('conv8:',conv8.shape)
                heatmap_pred = slim.conv2d(conv8, 1, kernel_size=[3,3], stride=1, scope='conv_9', activation_fn=tf.sigmoid, normalizer_fn=None) # 6 -
                # print ('conv8:',conv8.shape)
                if is_training:
                    l2_loss = tf.add_n(tf.losses.get_regularization_losses())
                    return  heatmap_pred,  l2_loss
                else:
                    # return heatmap_pred, heatmap_pred
                    return heatmap_pred, (conv0*15.0-8.0)
                    # return heatmap_pred, inputs*127.5-128

量化插件如dorefa.py脚本:

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
@author: Yuxin Wu,dzhang
@contact: ppwwyyxxc@gmail.com,dong.zhang@ingenic.com
"""
import tensorflow as tf
from tensorflow.python.ops import math_ops
from tensorflow.contrib.framework import add_model_variable
from tensorflow.python.training import moving_averages
# from tensorpack.models import *
# from tensorpack.tfutils.tower import get_current_tower_context
slim = tf.contrib.slim
from tensorflow.contrib.framework.python.ops import variables
from tensorflow.python.ops import array_ops
from tensorflow.python.framework import dtypes
import re
import numpy as np

weight_tensor_list = []
is_first = True



def weight_prepara(x):
    mean = math_ops.reduce_mean(x)
    var = math_ops.reduce_mean( math_ops.squared_difference(x, array_ops.stop_gradient(mean)) )
    std = tf.sqrt(var)
    x = tf.clip_by_value(x, mean - 3.0*std, mean + 3.0*std)
    return x

#@graph_memoized
def get_dorefa(bitW, bitA, bitG):
    """
    return the three quantization functions fw, fa, fg, for weights, activations and gradients respectively
    It's unsafe to call this function multiple times with different parameters
    """
    G = tf.get_default_graph()


    def quantize_w(x, k):
        k = k -1
        n = float(2**k)
        with G.gradient_override_map({"Round": "Identity"}):
            x = tf.round(x * n)
            x = tf.clip_by_value(x,-n,n-1) / n
            return x
    
    def quantize_a(x, k):
        n = float(2**k - 1)
        with G.gradient_override_map({"Round": "Identity"}):
            return tf.round(x * n) / n

    def quantize_g(x, k):
        n = float(2**k - 1)
        with G.gradient_override_map({"Round": "Identity"}):
            return tf.round(x * n) / n

    def fw(x):
        # print("Quantized Weight: %s, BITW: %d" % (x.op.name, bitW))
        if bitW == 32:
            return x
        if bitW == 1:   # BWN
            with G.gradient_override_map({"Sign": "Identity"}):
                E = tf.stop_gradient(tf.reduce_mean(tf.abs(x)))
                return tf.sign(x / E) * E
            
        pre_max_value = 1.0
        if len(x.shape) == 2:
#             print("dzhang debug fc!!!")
            pre_max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
            if x.shape[-1] == 1 or x.shape[-1] == 4:
                print("#########################")
                pre_max_value = math_ops.reduce_max(tf.abs(x))
        if len(x.shape) == 4:
#             print("dzhang debug conv!!!")
            pre_max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)



        x = weight_prepara(x)
        if len(x.shape) == 2:
#             print("dzhang debug fc!!!")
            max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
            if x.shape[-1] == 1 or x.shape[-1] == 4:
                print("#########################")
                max_value = math_ops.reduce_max(tf.abs(x))
        if len(x.shape) == 4:
#             print("dzhang debug conv!!!")
            max_value = math_ops.reduce_max(tf.abs(x), list( range(len(x.shape) - 1) ), keepdims=True)
        x = x / max_value
        res = quantize_w(x, bitW)
        res = res * pre_max_value
        return res

    def fa(x):
        # print("Quantized Action Relu6: %s, BITA: %d" % (x.op.name, bitA))
        if bitA == 32:
            return x
 
        x = tf.clip_by_value(x, 0.0, 6.0)
        x = x / 6.0
        res = quantize_a(x, bitA)
        return res
    
    def fg(x):
        return x

    return fw, fa, fg

2、计算loss损失函数

通过真实值与预测值,计算loss损失值,具体计算过程如losses_blur.py脚本:

#!/usr/bin/env python
# encoding: utf-8

import tensorflow as tf
import sys
sys.path.append('..')
from config_blur import cfg

def ohkm(loss, top_k):
    ohkm_loss = 0.
    for i in range(cfg.batch_size):
        sub_loss = loss[i]
        topk_val, topk_idx = tf.nn.top_k(sub_loss, k=top_k, sorted=False, name='ohkm{}'.format(i))
        tmp_loss = tf.gather(sub_loss, topk_idx, name='ohkm_loss{}'.format(i)) # can be ignore ???
        ohkm_loss += (tf.reduce_sum(sub_loss)-tf.reduce_sum(tmp_loss)) / (int(cfg.heatmap_size * cfg.heatmap_size)-top_k)
    ohkm_loss /= cfg.batch_size
    return ohkm_loss

def ohkm_all(loss, top_k):
    topk_val, topk_idx = tf.nn.top_k(loss, k=top_k, sorted=False, name='ohkm_all')
    tmp_loss = tf.gather(loss, topk_idx, name='ohkm_loss_all') # can be ignore ???
    # ohkm_loss = tf.reduce_sum(tmp_loss)/cfg.batch_size
    ohkm_loss = tf.reduce_sum(tmp_loss) / top_k
    return ohkm_loss

def loss(preds, labels, heatmap_pre, heatmap):
    labels = tf.cast(labels, tf.int64)
    print('pre labels', labels.get_shape())
    labels = tf.reshape(labels, (cfg.batch_size, -1))
    print('labels', labels.get_shape())
    labels = tf.one_hot(labels, cfg.classes)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
    pred_loss = tf.reduce_mean(cross_entropy)
    print('pred_loss', pred_loss.get_shape())

    heatmap_predict = tf.reshape(heatmap_pre, [cfg.batch_size,-1])
    heatmap_target = tf.reshape(heatmap, [cfg.batch_size,-1])
    heatmap_loss = tf.square(heatmap_predict-heatmap_target)
    heatmap_loss = ohkm(labels, heatmap_loss, int(cfg.train.ohem_ratio * cfg.heatmap_size * cfg.heatmap_size))
    heatmap_loss = tf.sqrt(heatmap_loss)
    # pred_loss = tf.reduce_mean(cross_entropy)
    return pred_loss, heatmap_loss

# def loss_ohem(preds, labels, heatmap_pre, heatmap):
def loss_ohem(heatmap_pre, heatmap_m, label):
    # labels = tf.cast(labels, tf.int64)
    # labels = tf.reshape(labels, (cfg.batch_size,))
    # print('pre labels', labels.get_shape())
    # labels = tf.one_hot(labels, cfg.classes)
    # print('labels', labels.get_shape())
    # cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=preds, labels=labels)
    # print('cross_entropy', cross_entropy.get_shape())
    # keep_num = tf.cast(cfg.batch_size * cfg.train.ohem_ratio, tf.int32)
    # cross_entropy = tf.reshape(cross_entropy, (cfg.batch_size,))
    # print('cross_entropy', cross_entropy.get_shape())
    # _, k_index = tf.nn.top_k(cross_entropy, keep_num)
    # loss = tf.gather(cross_entropy, k_index)
    # print('ohem loss', loss.get_shape())
    print ("heatmap_pre.shape",heatmap_pre.shape)
    heatmap_predict = tf.reshape(heatmap_pre, [cfg.batch_size,-1])
    ones = tf.ones_like(heatmap_predict,dtype=tf.float32)
    zeros = tf.zeros_like(heatmap_predict,dtype=tf.float32)
    heatmap_target = ones*label
    # heatmap_reverse = ones-heatmap_target
    # heatmap_decay = tf.where(tf.less(heatmap_target,0.5),heatmap_reverse, heatmap_target)
    print ("heatmap_m.shape",heatmap_m.shape)
    heatmap_mask = tf.reshape(heatmap_m, [cfg.batch_size,-1])
    # heatmap_mask = tf.where(tf.less(heatmap_target,0.5),zeros, ones)
    print ("heatmap_target.shape",heatmap_target.shape)
    heatmap_loss = tf.square(heatmap_predict-heatmap_target)
    heatmap_loss = heatmap_loss*heatmap_mask
    # heatmap_loss = heatmap_loss*heatmap_mask*heatmap_decay
    print ("heatmap_loss.shape",heatmap_loss.shape)
    # heatmap_loss = tf.reshape(heatmap_loss, [cfg.batch_size,-1])
    heatmap_loss = tf.reshape(heatmap_loss, [-1])	
    heatmap_loss = ohkm_all(heatmap_loss, int(cfg.train.ohem_ratio * cfg.heatmap_size * cfg.heatmap_size*cfg.batch_size))
    # heatmap_loss = tf.sqrt(heatmap_loss)

    return heatmap_loss
    # return tf.reduce_mean(loss)

3、将网络模型的loss、预测值等信息汇总

具体汇总封装到ColorRecNet类中,详见run_net_blur.py脚本:

#!/usr/bin/env python
# encoding: utf-8

import tensorflow as tf
import sys
sys.path.append('..')
from models.network_dw import Network
# from models.network_cdcn import Network
from config_blur import cfg
from models.losses_blur import loss, loss_ohem

class ColorRecNet:
    def __init__(self, img, heatmap, label, is_training, batcn_norm_decay=0.997):
        self.img = img
        self.heatmap = heatmap
        self.label = label
        self.is_training = is_training
        self.batch_norm_decay = batcn_norm_decay
        self.img_shape = tf.shape(self.img)
        backbone = Network()
        if is_training:
            self.heatmap_pre, self.l2_loss = backbone.inference(self.is_training, self.img)
            # self.head, self.l2_loss = backbone.resnet_v1_50(self.is_training, self.img)
        else:
            self.heatmap_pre, self.conv = backbone.inference(self.is_training, self.img)
            # self.head, self.conv = backbone.resnet_v1_50(self.is_training, self.img)

    def compute_loss(self):
        with tf.name_scope('loss_0'):
            heatmapLoss = loss_ohem(self.heatmap_pre, self.heatmap, self.label)
            # clsLoss, heatmapLoss = loss(self.head, self.truth, self.heatmap_pre, self.heatmap)
            self.heatmap_loss = heatmapLoss
            self.all_loss = heatmapLoss + self.l2_loss
            # self.all_loss = clsLoss + heatmapLoss*100 + self.l2_loss
        return self.all_loss, self.heatmap_loss,  self.l2_loss

    def predict(self):
        '''
        only support single image prediction
        '''
        # pred_score = tf.reshape(self.head, (-1, cfg.classes))
        # score = tf.nn.softmax(tf.reshape(self.head, (-1, cfg.classes)))
        # class_index = tf.argmax(pred_score, 1)
        return self.heatmap_pre, self.conv

五、模型的训练、保存、测试

1、模型训练及保存

通过tensorflow对模型进行训练、利用损失值不断去更新权重信息,使模型学到图片更多内容,然后保存模型,具体训练过程详见mulpi_gpus_train_blur_v2.py脚本:

#!/usr/bin/env python
# encoding: utf-8

import numpy as np
import tensorflow as tf
import sys
# sys.path.insert(0,'/home/xjyu/lgx/')
# sys.path.append(r'/home/xjyu/lgx/blur_quantize/')
from models.run_net_blur_v2 import ColorRecNet
from gen_data_batch_blur import gen_data_batch
from config_blur import cfg
import os
import re
import tensorflow.contrib.slim as slim
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
#os.environ["CUDA_VISIBLE_DEVICES"] = "0"

def get_variables_to_restore(include_vars=[], exclude_global_pool=False):
    variables_to_restore = []
    for var in slim.get_model_variables():
        if exclude_global_pool and 'global_pool' in var.op.name:
            #print(var)
            continue
        variables_to_restore.append(var)
    for var in slim.get_variables_to_restore(include=include_vars):
        if exclude_global_pool and 'global_pool' in var.op.name:
            #print(var)
            continue
        variables_to_restore.append(var)
    return variables_to_restore


def average_gradients(tower_grads):
    """Calculate the average gradient for each shared variable across all towers.

    Note that this function provides a synchronization point across all towers.

    Args:
          tower_grads: List of lists of (gradient, variable) tuples. The outer list
        is over individual gradients. The inner list is over the gradient
        calculation for each tower.
    Returns:
          List of pairs of (gradient, variable) where the gradient has been averaged
        across all towers.
    """
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        # Note that each grad_and_vars looks like the following:
        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
        grads = []
        for g, _ in grad_and_vars:
            # Add 0 dimension to the gradients to represent the tower.
            expanded_g = tf.expand_dims(g, 0)

            # Append on a 'tower' dimension which we will average over below.
            grads.append(expanded_g)

        # Average over the 'tower' dimension.
        grad = tf.concat(axis=0, values=grads)
        grad = tf.reduce_mean(grad, 0)

        # Keep in mind that the Variables are redundant because they are shared
        # across towers. So .. we will just return the first tower's pointer to
        # the Variable.
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads


def train(finetune):
    is_training = True

    # data pipeline
    imgs, heatmap, label = gen_data_batch(cfg.data_path, cfg.batch_size*cfg.train.num_gpus)
    print cfg.data_path
    print ("imgs:",imgs.shape)
    print ("heatmap:",heatmap.shape)
    print ("label:",label.shape)
    imgs_split = tf.split(imgs, cfg.train.num_gpus)
    heatmap_split = tf.split(heatmap, cfg.train.num_gpus)
    label_split = tf.split(label, cfg.train.num_gpus)

    global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0.), trainable=False)
    #lr = tf.train.piecewise_constant(global_step, cfg.train.lr_steps, cfg.train.learning_rate)
    #optimizer = tf.train.AdamOptimizer(learning_rate=lr)
    learn_rate_decay_step = int(cfg.train.num_samples / cfg.batch_size / cfg.train.num_gpus * cfg.train.learn_rate_decay_epoch)
    learning_rate = tf.train.exponential_decay(cfg.train.learn_rate, global_step, learn_rate_decay_step, cfg.train.learn_rate_decay, staircase=True)
    #optimizer = tf.train.GradientDescentOptimizer(learning_rate)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

    # Calculate the gradients for each model tower.
    tower_grads = []
    with tf.variable_scope(tf.get_variable_scope()):
        for i in range(cfg.train.num_gpus):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % (cfg.train.tower, i)) as scope:
                    model = ColorRecNet(imgs_split[i], heatmap_split[i], label_split[i], is_training)
                    loss, heatmap_loss, l2_loss = model.compute_loss()
                    tf.get_variable_scope().reuse_variables()
                    grads = optimizer.compute_gradients(loss)
                    tower_grads.append(grads)
                    if i == 0:
                        current_loss = loss
                        current_l2_loss = l2_loss
                        current_heatmap_loss = heatmap_loss
                        update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                        # print(tf.GraphKeys.UPDATE_OPS)
                        # print(update_op)
                        # print(grads)
                        # vars_det = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="BioRecNet")
    grads = average_gradients(tower_grads)
    with tf.control_dependencies(update_op):
        apply_gradient_op = optimizer.apply_gradients(grads, global_step=global_step)
        train_op = tf.group(apply_gradient_op,*update_op)

    # GPU config
    config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    # Create a saver
    # saver = tf.train.Saver(max_to_keep=1000)
    saver = tf.compat.v1.train.Saver(max_to_keep=1000)
    
    ckpt_dir = cfg.ckpt_path

    # init
    sess.run(tf.global_variables_initializer())
    if finetune:
        checkpoint = '/home/xjyu/lgx/end_face/ckpt1/livenessDet_blur_2-688'

        # variables_to_restore = slim.get_variables_to_restore()
        # init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
        # sess.run(init_assign_op, init_feed_dict)

        variables_to_restore = get_variables_to_restore(exclude_global_pool=True)
        init_assign_op, init_feed_dict = slim.assign_from_checkpoint(checkpoint, variables_to_restore, ignore_missing_vars=True)
        sess.run(init_assign_op, init_feed_dict)


    # running
    cnt_epoch = 0

    for i in range(1, cfg.train.max_batches):
        _, loss_,heatmap_loss_,l2_loss_,lr_ = sess.run([train_op, current_loss, current_heatmap_loss, current_l2_loss, learning_rate])
        if(i % 10 == 0):
            print(i,': ', '  loss: ', loss_, '   heatmap_loss_:', heatmap_loss_,'   l2_loss_:', l2_loss_, '   lr: ', lr_)
        if int(i) % int(cfg.train.num_samples / cfg.train.num_gpus / 100) == 0:
            cnt_epoch += 1
            print "save:"+ckpt_dir+'livenessDet_blur_2-%d'%cnt_epoch
            saver.save(sess, ckpt_dir+'livenessDet_blur_2', global_step=cnt_epoch, write_meta_graph=True)

if __name__ == '__main__':
    train(finetune=False)

模型训练过程如图所示:
模糊分类项目
模型保存如下图所示:
模糊分类项目

2、测试集进行测试

测试之前,需要生成保存测试10个分类结果的目录详见test.py脚本,然后分别对亚洲人、外国人测试样本进行测试,具体测试详见test_v2脚本:
test.py

# encoding: utf-8
import numpy as np
import os 

g_step = np.arange(101, 100, -1)
# print(g_step)
for i in range(10):
    os.makedirs('/home/xjyu/lgx/end_face/res/blur_%s'%i)

test_v2.py:

#!/usr/bin/env python
# encoding: utf-8


import numpy as np
import tensorflow as tf
import sys
sys.path.insert(0,'/home/xjyu/lgx/end_face/')
from models.run_net_blur_v2 import ColorRecNet
from config_blur import cfg
import cv2
import os
import re
from tqdm import tqdm
from PIL import Image
import random
import shutil

os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'


def test(img_path, g_step):
    is_training = False
    cfg.batch_size = 1
    a = 0
    b = 0
    c = 0
    d = 0
    input_width = 96
    input_height = 96
    ckpt_dir = cfg.ckpt_path

    imgs = os.listdir(img_path)

    configer = tf.ConfigProto()
    configer.gpu_options.per_process_gpu_memory_fraction = 0.1
    # mask_img = cv2.imread("/home/xjyu/lgx/blur_quantize/tools/cv/img/20190918/b.png",0)
    # mask_f = mask_img.astype(np.float32)/250.0

    with tf.Session(config=configer) as sess:
        # imgs_holder = tf.placeholder(tf.float32, shape=[1, 48, 48, 1])

        imgs_holder = tf.placeholder(tf.float32, shape=[1, input_height, input_width,1])
        model = ColorRecNet(imgs_holder, None, None, is_training)
        heatmap_pre, last_layer = model.predict()

        saver = tf.compat.v1.train.Saver()
        sess.run(tf.global_variables_initializer())
        # print ckpt_dir+'livenessDet_v4-'+str(g_step)
        saver.restore(sess, ckpt_dir+'livenessDet_blur_2-'+str(g_step))
        # saver.restore(sess, ckpt_dir+'livenessDet_128-16-bit4-0727-'+str(g_step))
        sess.run(tf.local_variables_initializer())

        for i in imgs:
          
            img = cv2.imread(os.path.join(img_path, i))
            try:
                h, w, _ = img.shape
            #    h, w = img.shape
            except:
                print os.path.join(img_path, i)
                continue
          
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            
            img_o = np.ones((h,w), dtype=np.float32)*128
            img_a = img_gray.astype(np.float32)
            img_mean = np.mean(img_a)
            scale = 128.0/float(img_mean)
            img_b = img_a * scale
            img_diff = abs(img_b-img_o)
            mean_diff = np.mean(img_diff)
            scale_2 = 30.0/mean_diff
            img_e=img_b-img_o
            img_f=img_e * scale_2
            img_g=img_f + img_o
            img_g=np.clip(img_g,0,255)
            img_h=img_g.astype(np.uint8)

            # img = img[:, :, 0]
            # image = cv2.resize(img, (48, 48))
            image=np.zeros((input_height,input_width),dtype=np.uint8)
            # img_blur = cv2.GaussianBlur(img_gray, (5, 5), 1)
            # img_abs = np.abs(img_gray-img_blur)

            image[0:h,0:w] = img_h[0:h,0:w]
            # image[0:h,0:w, 1] = img_abs[0:h,0:w]

            z_img=np.zeros((input_height,input_width),dtype=np.uint8)            
            o_img=np.ones((input_height,input_width),dtype=np.uint8)*255
            bmask=np.where(image<5,z_img,o_img)
            # bmask=np.where(image[:,:,0]<5,z_img,o_img)
            bmask = cv2.resize(bmask, (cfg.heatmap_size, cfg.heatmap_size))
            # if True:
            #     img_data = image.astype(np.float32) * mask_f / 127.5
            img_data = image.astype(np.float32) / 127.5
            # for j in range(0, 48):
            #     for k in range(0, 48):
                    # print(img_data[j, k])
            # f_write.close()
            img_input = np.reshape(img_data, [1, input_height, input_width, 1])
            # print type(img_input)
            # print img_input.shape
            heatmap_,  last_layer_ = sess.run([heatmap_pre, last_layer], feed_dict={imgs_holder: img_input})
            
            heatmap_s=heatmap_.reshape(-1)
            bmask_s=bmask.reshape(-1)
            # print heatmap_s.shape, bmask_s.shape
            mask_index = np.argwhere(bmask>120)
            # print mask_index.shape, img_path
            heatmap_v=heatmap_s[mask_index]
            # heatmap_mean = np.max(heatmap_v)
            heatmap_mean = np.mean(heatmap_v)



            heatmap_mean = 1-heatmap_mean
            if heatmap_mean <= 0.1 :
                shutil.copy(os.path.join(img_path, i),'res/blur_0')
                continue
            elif heatmap_mean <= 0.2 :
                shutil.copy(os.path.join(img_path, i),'res/blur_1')
                continue
            elif heatmap_mean <= 0.3 :
                shutil.copy(os.path.join(img_path, i),'res/blur_2')
                continue
            elif heatmap_mean <= 0.4 :
                shutil.copy(os.path.join(img_path, i),'res/blur_3')
                continue
            elif heatmap_mean <= 0.5 :
                shutil.copy(os.path.join(img_path, i),'res/blur_4')
                continue
            elif heatmap_mean <= 0.6 :
                shutil.copy(os.path.join(img_path, i),'res/blur_5')
                continue
            elif heatmap_mean <= 0.7 :
                shutil.copy(os.path.join(img_path, i),'res/blur_6')
                continue
            elif heatmap_mean <= 0.8 :
                shutil.copy(os.path.join(img_path, i),'res/blur_7')
                continue
            elif heatmap_mean <= 0.9 :
                shutil.copy(os.path.join(img_path, i),'res/blur_8')
                continue
            else:
                shutil.copy(os.path.join(img_path, i),'res/blur_9')
                continue
            
          

    print('{:^{}}|{:^{}}|{:^{}}|{:^{}}|{:^{}}'.format(a, 20, b, 20, c, 20, d, 20, img_path.strip().split('/')[-2] + '/' + img_path.strip().split('/')[-1], 20))
    tf.reset_default_graph()

if __name__ == '__main__':
    g_step = np.arange(25,24,-1)
    for i in g_step:
        print('*****************************', i, '*****************************')
        print('{:^{}}|{:^{}}|{:^{}}|{:^{}}'.format('no_live', 20, 'live', 20, 'unkown', 20, 'img_path', 20))

    
        # img_path = ['/home/xjyu/lgx/end_face/test/blur_test_jpg/'] # 亚洲人测试图片         
   
        img_path = ['/home/xjyu/blur_test_jpg/'] # 外国人测试图片
       
        for j in img_path:
            test(j, i)

模糊分类项目

亚洲人测试结果:

模糊分类项目

外国人测试结果:
模糊分类项目

训练集数据达到42万张
测试集数据2000张
需要数据集,请添加公众号,并留言获取:
模糊分类项目

本文地址:https://blog.csdn.net/yegeli/article/details/109630078

相关标签: Tensorflow