欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

Pytorch实现多输入图像分类

程序员文章站 2022-05-26 19:15:22
...

Pytorch实现多输入图像分类

在做图像分类实验的时候需要提取图像的传统特征与深度特征进行特征融合,即使用卷积神经网络提取图像的深度特征,使用自定义的方法来提取图像的传统特征,将两种特征进行特征融合(在这里我使用的是简单的拼接),融合后的特征向量再用分类器进行分类。
(python萌新,有写的不好的地方请见谅)

1.自定义图像传统特征的提取方法

two_feature.py:

import cv2
import numpy as np


def hsv(img):
    # img=cv2.imread(img_path)
    hsv=cv2.cvtColor(img,cv2.COLOR_RGB2HSV)
    H,S,V=cv2.split(hsv)
    v=V.ravel()[np.flatnonzero(V)]
    average_v=(sum(v)/(len(v)+1))/10. # 平均亮度
    s = S.ravel()[np.flatnonzero(S)]
    average_s=(sum(s)/(len(s)+1))/10.  # 平均饱和度
    h = H.ravel()[np.flatnonzero(H)]
    average_h=(sum(h)/(len(h)+1))/10.  # 平均色度
    return average_h,average_s,average_v


def get_feature(img_path):
    img=cv2.imread(img_path)
    h,s,v=hsv(img)  # 3
    feature_list=[]
    feature_list.append(h)
    feature_list.append(s)
    feature_list.append(v)
    return np.array(feature_list).T


if __name__ == '__main__':
    img_path='D:\\1AA\\Python\\weather_class\\dataset\\dataset_add\\all\\clean\\0.jpg'
    print(get_feature(img_path))
 

2.数据处理部分

从文件夹中读取图像,返回 [图像,图像传统特征,标签] ,我的数据集存储方式如下:
Pytorch实现多输入图像分类

two_data_loader.py

import os
import numpy as np
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset,DataLoader
import cv2
import torch
from two_feature import *

def get_path_and_label(path):
    '''
    从文件夹中读取图片路径和标签
    :param path:
    :return:
    '''
    class_list = ['a', 'b', 'c', 'd', 'e', 'f']
    path_list = []
    label_list = []
    for class_name in class_list:
        in_path=os.path.join(path,class_name)
        for img_name in os.listdir(in_path):
            img_path=os.path.join(in_path,img_name)
            path_list.append(img_path)
            if class_name=='a':
                label_list.append(0)
            elif class_name=='b':
                label_list.append(1)
            elif class_name=='c':
                label_list.append(2)
            elif class_name=='d':
                label_list.append(3)
            elif class_name=='e':
                label_list.append(4)
            elif class_name=='f':
                label_list.append(5)
    # print('路径读取完成')
    paths=np.array(path_list)
    labels=np.array(label_list)
    temp = np.array([paths, labels])
    temp = temp.transpose()
    np.random.seed(1)  # 随机种子
    np.random.shuffle(temp)
    paths = list(temp[:, 0])
    labels = list(temp[:, 1])
    return paths,labels

def label2tensor(num):
    a=[num]
    a=np.array(a)
    out=torch.from_numpy(a)
    return out


def default_loader(path):
    '''
    图片读取方法
    :param path:
    :return:
    '''
    img=cv2.imread(path)
    img=cv2.resize(img,(227,227))  # AlexNet的输入图像大小为227*227
    return img


class my_dataset(Dataset):
    def __init__(self,path,transform=None,target_transform=None,loader=default_loader,feature=get_feature):
        imgs=[]
        a,b=get_path_and_label(path)
        for i,j in zip(a,b):
            imgs.append((i,int(j)))
        self.imgs=imgs
        self.transform=transform
        self.target_transform=target_transform
        self.loader=loader
        self.feature=feature
    def __getitem__(self, index):
        fn,label=self.imgs[index]
        img=self.loader(fn)
        img_feature=self.feature(fn)
        if self.transform is not None:
            img=self.transform(img)
        return img,img_feature,label
    def __len__(self):
        return len(self.imgs)


if __name__ == '__main__':
    base_path='D:\\Python\\dataset\\dataset\\'
    train_data=my_dataset(path=base_path+'train',transform=transforms.ToTensor())  # ToTensor会自动将像素值归一化到0-1
    test_data=my_dataset(path=base_path+'test',transform=transforms.ToTensor())

    train_loader=DataLoader(dataset=train_data,batch_size=16)
    test_loader=DataLoader(dataset=test_data,batch_size=16)


    for i in test_loader:
        print(i[1][0])
        break

3.模型及训练

提取图像的深度特征采用AlexNet模型,这里需要用if __name__ == '__main__':将整个代码包起来,否则采用多线程(num_workers=4)加载数据时会报错,原因不明,有人说是Windows的bug,当然你也可以将其删掉,不过需要将num_workers改为0。

AlexNet.py

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from two_data_loader import *
import torch.optim as optim

if __name__ == '__main__':  

    base_path='D:\\Python\\dataset\\dataset\\'
    train_data=my_dataset(path=base_path+'train',transform=transforms.ToTensor())  # ToTensor会自动将像素值归一化到0-1
    test_data=my_dataset(path=base_path+'test',transform=transforms.ToTensor())
    validation_data = my_dataset(path=base_path + 'validation', transform=transforms.ToTensor())

    train_loader=DataLoader(dataset=train_data,batch_size=8,num_workers=4)
    test_loader=DataLoader(dataset=test_data,batch_size=8,num_workers=4)
    validation_loader = DataLoader(dataset=validation_data, batch_size=8, num_workers=4)

    class AlexNet(nn.Module):  # 定义网络,推荐使用Sequential,结构清晰
        def __init__(self):
            super(AlexNet, self).__init__()
            self.conv1 = torch.nn.Sequential(  # input_size = 227*227*3
                torch.nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(kernel_size=3, stride=2)  # output_size = 27*27*96
            )
            self.conv2 = torch.nn.Sequential(  # input_size = 27*27*96
                torch.nn.Conv2d(96, 256, 5, 1, 2),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(3, 2)  # output_size = 13*13*256
            )
            self.conv3 = torch.nn.Sequential(  # input_size = 13*13*256
                torch.nn.Conv2d(256, 384, 3, 1, 1),
                torch.nn.ReLU(),  # output_size = 13*13*384
            )
            self.conv4 = torch.nn.Sequential(  # input_size = 13*13*384
                torch.nn.Conv2d(384, 384, 3, 1, 1),
                torch.nn.ReLU(),  # output_size = 13*13*384
            )
            self.conv5 = torch.nn.Sequential(  # input_size = 13*13*384
                torch.nn.Conv2d(384, 256, 3, 1, 1),
                torch.nn.ReLU(),
                torch.nn.MaxPool2d(3, 2)  # output_size = 6*6*256
            )

            # 网络前向传播过程
            self.dense1 = torch.nn.Sequential(
                torch.nn.Linear(9216, 4096),
                torch.nn.ReLU(),
                torch.nn.Dropout(0.5),
            )
            self.dense2 = torch.nn.Sequential(
                torch.nn.Linear(4126, 4126),
                torch.nn.ReLU(),
                torch.nn.Dropout(0.5),
                torch.nn.Linear(4126, 6)
            )

        def forward(self, x, f):  # 正向传播过程
            conv1_out = self.conv1(x)
            conv2_out = self.conv2(conv1_out)
            conv3_out = self.conv3(conv2_out)
            conv4_out = self.conv4(conv3_out)
            conv5_out = self.conv5(conv4_out)
            res = conv5_out.view(conv5_out.size(0), -1)
            dense1_out = self.dense1(res)
            # print(dense1_out.dtype)
            # print(f.dtype)
            f=f.to(torch.float32)  # 将img_feature改为float32格式与AlexNet提取的特征进行拼接
            concat=torch.cat([dense1_out,f],1)  # 将两个特征向量进行拼接
            # print(concat.size())
            out=self.dense2(concat)
            return out


    # 使用GPU
    device = torch.device("cuda:0")
    net = AlexNet().to(device)


    # CrossEntropyLoss就是我们需要的损失函数
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=1e-5)
    def train():
        print("Start Training...")
        val_list = [0]
        for epoch in range(50):
            # 我们用一个变量来记录每100个batch的平均loss
            loss100 = 0.0
            # 我们的dataloader派上了用场
            for i, data in enumerate(train_loader):
                inputs,feature, labels = data
                inputs,feature, labels = inputs.to(device),feature.to(device), labels.to(device)  # 注意需要复制到GPU
                optimizer.zero_grad()  # 梯度清零
                outputs = net(inputs,feature)  # 计算前向传播的输出
                loss = criterion(outputs, labels)  # 根据输出计算loss
                loss.backward()  # 计算完loss后进行反向梯度传播
                optimizer.step()  # 用计算的梯度去做优化,根据计算的梯度调整参数
                loss100 += loss.item()
                if i % 100 == 99:
                    print('[Epoch %d, Batch %5d] loss: %.3f' %
                          (epoch + 1, i + 1, loss100 / 100))
                    loss100 = 0.0

            # 在测试集上的准确率
            # 预测正确的数量和总数量
            correct = 0
            total = 0
            # 使用torch.no_grad的话在前向传播中不记录梯度,节省内存
            with torch.no_grad():
                for data in validation_loader:
                    images,feature, labels = data
                    images,feature, labels = images.to(device), feature.to(device),labels.to(device)
                    # 预测
                    outputs = net(images,feature)
                    # 我们的网络输出的实际上是个概率分布,去最大概率的哪一项作为预测分类
                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
            val_acc=100 * correct / total

            print('Accuracy of the network on the validation images: %.2f %%' % val_acc)

            # 保存在测试集上的最佳模型
            if val_acc > max(val_list):
                torch.save(net, '.\\AlexNet.pth')
                val_list.append(val_acc)
                print('权重已更新...')
        print("Done Training!")

    def test():
    # # 加载模型
        net=torch.load('.\\AlexNet.pth')
        correct = 0
        total = 0
        # 使用torch.no_grad的话在前向传播中不记录梯度,节省内存
        with torch.no_grad():
            for data in test_loader:
                images,feature,labels = data
                images, feature,labels = images.to(device),feature.to(device), labels.to(device)
                # 预测
                outputs = net(images,feature)
                # 我们的网络输出的实际上是个概率分布,去最大概率的哪一项作为预测分类
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: %d %%' % (
            100 * correct / total))

    train()
    # test()

Reference:
简单的图像分类方法: https://zhuanlan.zhihu.com/p/39667289
数据集加载方法: https://blog.csdn.net/shunshune/article/details/89316572
AlexNet模型: https://blog.csdn.net/Gilgame/article/details/85056344

相关标签: 笔记