欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

记--口罩检测+人脸识别

程序员文章站 2022-07-13 10:17:40
...

二目标(未完成)

人脸口罩识别、佩戴模型调通,完成特定人脸的标定(能识别出是谁戴的口罩)。

方法

第一步,目标检测,检测图像是否佩戴口罩,并输出分类标签以及位置。 第二步,提取检测区域,进行人脸识别。

一、准备工作

人脸数据网上一大堆。

1、anaconda安装,网上找找教程。

2、安装labelImg,百度教程。

3、环境配置

import os
import glob
from xml.etree import ElementTree as ET
import numpy as np
import skimage.color
import skimage.io
import skimage.transform
import torch
from torch.utils.data import Dataset, DataLoader

4、数据读取

 def xmlData_Read(self):
        #  两种数据戴口罩和没戴口罩,face_with_mask和face_without_mask
        xml_list = glob.glob(self.path + '/*.xml')
        for i,xml in enumerate(xml_list):
            target = {'id': '', 'IsMasked': "", 'boxes': ''}
            img_info = {'id': '', 'path': '', 'fileName': '', 'width': '', 'height': '', 'depth': ''}
            tree = ET.parse(xml)
            root = tree.getroot()
            #  加入图片名字以及编号
            target['id'] = str(i + 1)
            img_info['id'] = str(i + 1)
            img_info['fileName'] = root.find('filename').text

            img_info['path'] = root.find('path').text

            #  图片大小
            for size in root.findall('size'):
                img_info['width'] = size.find('width').text
                img_info['height'] = size.find('height').text
                img_info['depth'] = size.find('depth').text

            #  重构图片大小,并重写入xml文件
            if img_info['width'] != '256' and img_info['height'] != 256:
                image = skimage.transform.resize(skimage.io.imread(root.find('path').text), (256, 256))
                # If grayscale. Convert to RGB for consistency.
                if image.ndim != 3:
                    image = skimage.color.gray2rgb(image)
                skimage.io.imsave(root.find('path').text,image)
                for size in root.findall('size'):
                    size.find('width').text = '256'
                    size.find('height').text = '256'
                    size.find('depth').text = '3'
                tree.write(xml)
            for ob in root.findall('object'):
                if 'face_with_mask' == ob.find('name').text:
                    target['IsMasked'] = '1'
                elif 'face_without_mask' == ob.find('name').text:
                    target['IsMasked'] = '0'
                for bndbox in ob.iter('bndbox'):
                    box = []
                    box.append(int(bndbox.find('xmin').text))
                    box.append(int(bndbox.find('ymin').text))
                    box.append(int(bndbox.find('xmax').text))
                    box.append(int(bndbox.find('ymax').text))
                    target['boxes'] = box
            self.img_Cont_Info.append(target)
            self.img_Info.append(img_info)
#图片resize
def resize(self,path):
        img_list = os.listdir(os.path.join(path))
        for i in img_list:
            image = skimage.transform.resize(skimage.io.imread(path + '/' + i), (256, 256), )
            skimage.io.imsave(path + '/' + i, image.astype(np.uint8))
        return
# 读取图像
    def __getitem__(self, image_id):
        image_id = int(image_id)
        image = skimage.io.imread(self.img_Info[image_id-1]['path'])
        if image.shape != (256,256,3):
            image = skimage.transform.resize(image, (256, 256))
        # If grayscale. Convert to RGB for consistency.
        if image.ndim != 3:
            image = skimage.color.gray2rgb(image)
        # If has an alpha channel, remove it for consistency
        if image.shape[-1] == 4:
            image = image[..., :3]
        # 将所有转换为torch.Tensor
        box = torch.as_tensor(self.img_Cont_Info[image_id-1]['boxes'], dtype=torch.float32)
        label = torch.as_tensor(int(self.img_Cont_Info[image_id-1]['IsMasked']), dtype=torch.int64)
        image_id = torch.as_tensor(int(self.img_Cont_Info[image_id-1]['id']))
        target = []
        # target["image_id"] = image_id
        # target["label"] = label
        # target["boxes"] = box
        target.append(label)
        target[1:1]=box
        if self.transforms is not None:
            image = self.transforms(image)
        target = torch.Tensor(target).long()
        return image, target

二、网络搭建(VGG11) 


# N是批量大小; D_in是输入维度;
# H是隐藏的维度; D_out是输出维度。
N, D_in, H, D_out = 64, 65536, 100, 5
cfg = {'VGG11': [32, 'M', 64, 'M', 128, 128, 'M', 256, 256, 'M', 256, 256, 'M']}

class VGG_Net(nn.Module):
    def __init__(self, vgg_name):
        super(VGG_Net, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(16384, 5)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

 三、模型训练


def train(epoch,net,trainloader,use_cuda,optimizer,criterion):
    print('\nEpoch: %d' % epoch)
    net.train()
    train_loss = 0
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(trainloader):
        # 将数据移到GPU上
        if use_cuda:
            inputs, targets = inputs.cuda(), targets.cuda()
        # 先将optimizer梯度先置为0
        optimizer.zero_grad()
        # Variable表示该变量属于计算图的一部分,此处是图计算的开始处。图的leaf variable
        inputs, targets = Variable(inputs), Variable(targets)
        # 模型输出
        outputs = net(inputs)
        # 计算loss,图的终点处
        print(outputs)
        print(targets)
        loss = criterion(outputs, targets)
        # 反向传播,计算梯度
        loss.backward()
        # 更新参数
        optimizer.step()
        # 注意如果你想统计loss,切勿直接使用loss相加,而是使用loss.data[0]。因为loss是计算图的一部分,如果你直接加loss,代表total loss同样属于模型一部分,那么图就越来越大
        train_loss += loss.data[0]
        # 数据统计
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += predicted.eq(targets.data).cpu().sum()

        progress_bar(batch_idx, len(trainloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
                     % (train_loss / (batch_idx + 1), 100. * correct / total, correct, total))


def main():
    # from utils import progress_bar
    use_cuda = torch.cuda.is_available()

    best_acc = 0  # best test accuracy
    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
    # 获取数据集,并先进行预处理
    print('==> Preparing data..')

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    tran = transforms.ToTensor()
    trainset = DR.customDataset('C:/Users/小z\Desktop\label_data', tran)
    trainset.xmlData_Read()
    trainloader = data.DataLoader(trainset, batch_size=5, shuffle=False, num_workers=2)

    testset = DR.customDataset('C:/Users/小z/Desktop/test', tran)
    testset.xmlData_Read()
    testloader = data.DataLoader(testset, batch_size=2, num_workers=2)

    classes = ['face_with_mask', 'face_without_mask']

    net = model.VGG_Net('VGG11')

    if use_cuda:
        # move param and buffer to GPU
        net.cuda()
        # parallel use GPU
        net = torch.nn.DataParallel(net)
        # speed up slightly
        cudnn.benchmark = True

    # 定义度量和优化
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)

    # 运行模型
    for epoch in range(start_epoch, start_epoch+200):
        train(epoch,net,trainloader,use_cuda,optimizer,criterion)
    torch.save(net.state_dict(), './checkpoint/')
    # test(epoch)
    # 清除部分无用变量
    torch.cuda.empty_cache()

卡在此部分,出现了一个问题

记--口罩检测+人脸识别

第一个是模型output,第二个是target(IsMasked,xmin,ymin,xmax,ymax),两个输出有点问题,希望大佬指点下。