欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

人脸识别和关键点检测

程序员文章站 2022-04-17 17:33:23
...

本文转载于【手把手教学】人脸识别和关键点检测(基于tensorflow和opencv)进行了复现,博主代码比较基础,适合小白,给我推荐了深度学习入门教程(包括网课、基础、论文及源码解析等)继续向优秀的博主学习!
要开启漫长的代码学习之旅!冲鸭!感谢上面博主给的入门指导

1 准备

1.1 数据集

kaggle上Facial Keypoints Detection
若无法获取验证码,则可访问聚数力数据集

1.2安装包准备

pip install opencv-python
cv2包安装不了解决方案:project interpreter——show all——最下面一个——+——把python路径下的site-package添加进去(由用户添加即为添加)
人脸识别和关键点检测

2 人脸检测

复现该博主代码:手把手教你做人脸识别和关键点检测(基于tensorflow和opencv)
新建face detection.py.

import cv2


def ad_threshold(img):
    th2 = cv2.adaptiveThreshold(img, 255,
                                cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                cv2.THRESH_BINARY, 19, 4)  # 自适应二值化
    return th2


def CatchUsbVideo(window_name, camera_index):
    # 定义主函数
    cv2.namedWindow(window_name)  # 创建摄像头窗口
    cap = cv2.VideoCapture(0, cv2.CAP_DSHOW)  # 调用摄像头(一般电脑自带摄像头index为0)

    # 调用分类(人脸识别分类器是cv一个预训练的模型,文件名为haarcascade_frontalface_alt2.xml)
    # 在我的电脑里查找就可以找到,找到后复制到当前文件夹内
    # 我的电脑的储存路径是C:\Users\dell\AppData\Roaming\Python\Python37\site-packages\cv2\data
    classfier = cv2.CascadeClassifier('F:/try/face detect/haarcascade_frontalface_alt2.xml')
    # 设置边框颜色(用于框出人脸)
    color = (0, 255, 0)

    font = cv2.FONT_HERSHEY_SIMPLEX  # 创建摄像头前置的文字框

    while cap.isOpened():
        catch, frame = cap.read()  # 读取每一帧图片

        if not catch:
            raise Exception('Check if the camera if on.')
            break
            # 转换为灰度图片
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray = ad_threshold(gray)  # 自适应二值化处理

        # scaleFactor 为图片缩放比例
        # minNeighbors 是至少检查为3次是人脸才作为标记,适当增大可以有效抗干扰
        # minSize 是检测的最小人脸的大小
        faceRects = classfier.detectMultiScale(
            gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32))
        if len(faceRects) > 0:
            # 历遍每次检测的所有脸
            for face in faceRects:
                x, y, w, h = face  # face是一个元祖,返回了分类器的检测结果,包括起始点的坐标和高度宽度

                image = frame[y - 10:y + h + 10, x - 10:x + w + 10]  # 对原图片进行裁剪

                cv2.rectangle(frame, (x - 5, y - 5), (x + w + 5, y + h + 5), color, 2)  # 绘制人脸检测的线框
                cv2.putText(frame, 'face', (x + 30, y + 30), font, 1, (255, 0, 255), 4)

        cv2.imshow(window_name, frame)  # 显示人脸检测结果
        c = cv2.waitKey(10)

        if c & 0xFF == ord('q'):
            # 按q退出
            break

        if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
            # 点x退出
            break
            # 释放摄像头
    cap.release()
    cv2.destroyAllWindows()


if __name__ == "__main__":
    CatchUsbVideo("face_detect", camera_index=0)
    # camera_index 是摄像头的编号,其中笔记本前置摄像头编号为0


3 读取数据

新建read_data.py

from random import shuffle
import pandas as pd
import numpy as np
import pickle
import cv2
import os
from random import randint


class Reader(object):

    def __init__(self):

        self.train_csv = 'F:/try/face detect/Facial Keypoints Detection/training.csv'

        self.test_csv = 'F:/try/face detect/Facial Keypoints Detection/test.csv'

        self.cursor = 0

        self.names_path = 'F:/try/face detect/Facial Keypoints Detection/names.txt'

        self.data_path = 'F:/try/face detect/Facial Keypoints Detection/data.pkl'

        self.train_image_path = 'F:/try/face detect/Facial Keypoints Detection/train_image'

        self.size = 96

        if not os.path.exists(self.train_image_path):

            os.makedirs(self.train_image_path)

            self.data = self.pre_process()

        else:

            with open(self.data_path, 'rb') as f:

                self.data = pickle.load(f)

        print('There is {} in total data.'.format(len(self.data)))

        shuffle(self.data)

        with open(self.names_path, 'r') as f:

            self.names = f.read().splitlines()

        self.data_num = len(self.data)

        self.label_num = len(self.names)

    def pre_process(self):

        data = pd.read_csv(self.train_csv)
        data = data.dropna()

        cols = data.columns[:-1]

        data = data.to_dict()

        for key, value in data['Image'].items():

            data['Image'][key] = np.fromstring(value, sep=' ')

        data_names = list(data.keys())
        data_names.remove('Image')

        with open(self.names_path, 'w') as f:

            for value in data_names:
                f.writelines(value+'\n')

        labels = []

        for index in data['Image'].keys():

            label = {}

            image = data['Image'][index].reshape((96, 96))
            image_name = 'image_{}.jpg'.format(index)
            image_path = os.path.join(self.train_image_path, image_name)

            cv2.imwrite(image_path, image)

            label['image_path'] = image_path

            for point_name in data_names:
                label[point_name] = data[point_name][index]

            labels.append(label)

        with open(self.data_path, 'wb') as f:
            pickle.dump(labels, f)

        return labels

    def random_flip(self, image, points):

        if randint(0, 1):

            image = np.flip(image, axis=0)
            points[1::2] = 1 - points[1::2]

        return image, points

    def generate(self, batch_size=1):

        images = []
        points = []

        for _ in range(batch_size):

            path = self.data[self.cursor]['image_path']
            image = cv2.imread(path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

            images.append(image)

            tmp = []
            for key in self.names:

                value = self.data[self.cursor][key]
                tmp.append(value)

            points.append(tmp)

            self.cursor += 1

            if self.cursor >= self.data_num:

                self.cursor = 0
                shuffle(self.data)

        images = np.array(images).reshape(
            (batch_size, self.size, self.size, 1))
        images = images - 127.5

        points = np.array(points)
        points = points/self.size

        # images, points = self.random_flip(images, points)

        return images, points


if __name__ == "__main__":

    import matplotlib.pyplot as plt

    reader = Reader()

    for _ in range(10):

        image, point = reader.generate(1)

        image = np.squeeze(image)
        point = np.squeeze(point)

        image = (image + 127.5).astype(np.int)
        point = (point * 96).astype(np.int)

        result = image.copy()

        y_axis = point[1::2]
        x_axis = point[::2]

        color = (0, 0, 255)

        for y, x in zip(y_axis, x_axis):

            cv2.circle(result, (x, y), 1, color)

        plt.imshow(result)
        plt.show()

运行结束可以看到增加了三个文件(先前只有.csv)
人脸识别和关键点检测

4 训练模型

新建network.py,只是为了熟悉一下流程,所以只设置了epoch为20,原博主实际设为100,如若报错缺少什么包,conda install xxx 即可

import tensorflow as tf
from read_data import Reader
import os
import numpy as np

slim = tf.contrib.slim


class Net(object):

    def __init__(self, is_training=True):

        self.is_training = is_training

        if self.is_training:

            self.reader = Reader()

        self.batch_size = 16

        self.lr = 2e-4

        self.wd = 5e-3

        self.epoches = 20

        self.batches = 256

        self.size = 96

        self.label_num = 30

        self.x = tf.placeholder(tf.float32, [None, self.size, self.size, 1])

        self.y = tf.placeholder(tf.float32, [None, self.label_num])

        self.y_hat = self.network(self.x)

        self.model_path = './model'

        self.ckpt_path = os.path.join(self.model_path, 'model.ckpt')

        self.saver = tf.train.Saver()

    def loss_layer(self, y, y_hat):

        loss = tf.reduce_sum(tf.square(y - y_hat))

        return loss

    def network(self, inputs):

        with tf.variable_scope('net'):

            with slim.arg_scope([slim.conv2d],
                                activation_fn=tf.nn.relu,
                                weights_regularizer=slim.l2_regularizer(self.wd)):

                # Block init
                net = slim.conv2d(inputs, 1024, [3, 3],
                                  2, scope='conv_init', padding='SAME')

                # Block 1
                net = slim.repeat(net, 2, slim.conv2d,
                                  64, [3, 3], scope='conv1', padding='SAME')
                net = slim.max_pool2d(
                    net, [2, 2], scope='pool1', padding='SAME')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block1')

                # Block 2
                net = slim.repeat(net, 2, slim.conv2d,
                                  128, [3, 3], scope='conv2')
                net = slim.max_pool2d(
                    net, [2, 2], scope='pool2', padding='SAME')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block2')

                # Block 3
                net = slim.repeat(net, 3, slim.conv2d,
                                  256, [3, 3], scope='conv3')
                net = slim.max_pool2d(
                    net, [2, 2], scope='pool3', padding='SAME')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block3')

                # Block 4
                net = slim.repeat(net, 3, slim.conv2d,
                                  512, [3, 3], scope='conv4')
                net = slim.max_pool2d(
                    net, [2, 2], scope='pool4', padding='SAME')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block4')

                # Block 5
                net = slim.repeat(net, 3, slim.conv2d,
                                  512, [3, 3], scope='conv5')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block5')

                # Block 6
                net = slim.conv2d(net, 1024, [3, 3],
                                  2, scope='conv6')

                net = tf.layers.batch_normalization(
                    net, trainable=self.is_training, name='BN_block6')

                net = tf.layers.flatten(net)

                logits = tf.layers.dense(net, self.label_num)

                if self.is_training:

                    logits = tf.layers.dropout(logits)

                # logits = tf.nn.tanh(logits)

                return logits

    def train_net(self):

        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)

        self.loss = self.loss_layer(self.y, self.y_hat)

        self.optimizer = tf.compat.v1.train.AdamOptimizer(self.lr)

        self.train_step = self.optimizer.minimize(self.loss)

        with tf.Session() as sess:

            sess.run(tf.compat.v1.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(self.model_path)

            if ckpt and ckpt.model_checkpoint_path:
                # 如果保存过模型,则在保存的模型的基础上继续训练
                self.saver.restore(sess, ckpt.model_checkpoint_path)
                print('Model Reload Successfully!')

            for epoch in range(self.epoches):

                loss_list = []

                for batch in range(self.batch_size):

                    images, labels = self.reader.generate(self.batch_size)

                    feed_dict = {
                        self.x: images,
                        self.y: labels
                    }

                    loss_value, _ = sess.run(
                        [self.loss, self.train_step], feed_dict)

                    loss_list.append(loss_value)

                loss = np.mean(np.array(loss_list))

                print('epoch:{} loss:{}'.format(epoch, loss))

                with open('./losses.txt', 'a') as f:
                    f.write(str(loss)+'\n')

            self.saver.save(sess, self.ckpt_path)

    def test_net(self, image, sess):

        image = image.reshape((1, self.size, self.size, 1)) - 127.5

        points = sess.run(self.y_hat, feed_dict={self.x: image})

        points = (points * self.size).astype(np.int)

        return np.squeeze(points)


if __name__ == '__main__':

    import cv2
    import matplotlib.pyplot as plt

    net = Net()

    net.train_net()

    with open('./losses.txt', 'r') as f:

        losses = f.read().splitlines()

    losses = [eval(v) for v in losses]

    plt.plot(losses)
    plt.title('loss')
    plt.show()

运行代码过程中可以看到
人脸识别和关键点检测
训练过程中生成的相应文件,model中保存了训练的模型,loss.txt中保存了训练的loss值
人脸识别和关键点检测
生成的loss曲线如图
人脸识别和关键点检测

5 笔记本摄像头读取视频进行识别

新建keypoint.py

import cv2
import tensorflow as tf
from network import Net
import numpy as np


class FaceDetertor(object):

    def __init__(self):

        self.model_path = 'F:/try/face detect/model'

        self.net = Net(is_training=False)

        self.size = 96

    def ad_threshold(self, img):

        th2 = cv2.adaptiveThreshold(img, 255,
                                    cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                    cv2.THRESH_BINARY, 19, 4)  # 自适应二值化
        return th2

    def CatchUsbVideo(self, window_name, camera_index):

        # 定义主函数

        cv2.namedWindow(window_name)  # 创建摄像头窗口

        cap = cv2.VideoCapture(camera_index)  # 调用摄像头(一般电脑自带摄像头index为0)

        # 调用分类(人脸识别分类器是cv一个预训练的模型,文件名为haarcascade_frontalface_alt2.xml)
        # 在我的电脑里查找就可以找到,找到后复制到当前文件夹内
        # 我的电脑的储存路径是C:\Users\dell\AppData\Roaming\Python\Python37\site-packages\cv2\data
        classfier = cv2.CascadeClassifier('F:/try/face detect//haarcascade_frontalface_alt2.xml')
        # 设置边框颜色(用于框出人脸)
        color = (0, 255, 0)
        font = cv2.FONT_HERSHEY_SIMPLEX  # 创建摄像头前置的文字框
        with tf.Session() as sess:

            sess.run(tf.compat.v1.global_variables_initializer())

            ckpt = tf.train.get_checkpoint_state(self.model_path)

            if ckpt and ckpt.model_checkpoint_path:

                # 如果保存过模型,则在保存的模型的基础上继续训练
                self.net.saver.restore(sess, ckpt.model_checkpoint_path)

                print('Model Reload Successfully!')

            while cap.isOpened():
                catch, frame = cap.read()  # 读取每一帧图片
                if not catch:
                    raise Exception('Check if the camera if on.')
                    break

                # 转换为灰度图片
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                gray = self.ad_threshold(gray)  # 自适应二值化处理

                # scaleFactor 为图片缩放比例
                # minNeighbors 是至少检查为3次是人脸才作为标记,适当增大可以有效抗干扰
                # minSize 是检测的最小人脸的大小
                faceRects = classfier.detectMultiScale(
                    gray, scaleFactor=1.2, minNeighbors=2, minSize=(32, 32))

                if len(faceRects) > 0:
                    # 历遍每次检测的所有脸

                    for face in faceRects:
                        x, y, w, h = face  # face是一个元祖,返回了分类器的检测结果,包括起始点的坐标和高度宽度
                        image = frame[y-10:y+h+10, x-10:x+w+10]  # 对原图片进行裁剪

                        cv2.rectangle(frame, (x-5, y-5), (x+w+5, y+h+5),
                                      color, 2)  # 绘制人脸检测的线框

                        cv2.putText(frame, 'face', (x + 30, y + 30),
                                    font, 1, (255, 0, 255), 4)

                        image_x = cv2.resize(cv2.cvtColor(
                            image, cv2.COLOR_BGR2GRAY), (self.size, self.size))

                        points = self.net.test_net(image_x, sess)

                        points_x = points[::2] / self.size * w + x
                        points_y = points[1::2] / self.size * h + y

                        points_x = points_x.astype(np.int)
                        points_y = points_y.astype(np.int)

                        for x_, y_ in zip(points_x, points_y):

                            cv2.circle(frame, (x_, y_), 2, (0, 0, 255), -1)


                cv2.imshow(window_name, frame)  # 显示人脸检测结果
                c = cv2.waitKey(10)

                if c & 0xFF == ord('q'):
                    # 按q退出
                    break

                if cv2.getWindowProperty(window_name, cv2.WND_PROP_AUTOSIZE) < 1:
                    # 点x退出
                    break

        # 释放摄像头
        cap.release()
        cv2.destroyAllWindows()


if __name__ == "__main__":

    Detertor = FaceDetertor()

    Detertor.CatchUsbVideo("face_detect", camera_index=0)

    # camera_index 是摄像头的编号,其中笔记本前置摄像头编号为0

成功!!