测试batch_size 的大小对训练时间的影响: 提速不是成比例的

程序员文章站 2022-06-13 09:58:47
...
eg1:
train 的batch_size 设置为 160 ， test 的 batch_size设置为100（设置为160会爆显存）
训练集 4000 张，测试集 1000张
测试batch_size 的大小对训练时间的影响: 提速不是成比例的
eg2:
train 的batch_size 设置为 80 ， test 的 batch_size设置为100
训练集 4000张，测试集 1000张
测试batch_size 的大小对训练时间的影响: 提速不是成比例的
可以看到提速不是成比例的。
附上源码：
import torch
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import torchvision.transforms as trans
import torch.nn as nn
import matplotlib.pyplot as plt
from PIL import ImageDraw
import Polygon as plg
import time


def rectangle_to_polygon(rect):
    # print(rect)
    resBoxes = np.empty([1, 8], dtype='int32')
    resBoxes[0, 0] = int(rect[0])
    resBoxes[0, 4] = int(rect[3])
    resBoxes[0, 1] = int(rect[0])
    resBoxes[0, 5] = int(rect[1])
    resBoxes[0, 2] = int(rect[2])
    resBoxes[0, 6] = int(rect[1])
    resBoxes[0, 3] = int(rect[2])
    resBoxes[0, 7] = int(rect[3])

    pointMat = resBoxes.reshape([2, 4]).T
    # print(pointMat)
    return plg.Polygon(pointMat)


def get_intersection(pD, pG):
    pInt = pD & pG
    if len(pInt) == 0:
        return 0
    return pInt.area()


def get_union(pD, pG):
    # print("pd=",pD)
    # print("pg=",pG)

    areaA = pD.area()
    areaB = pG.area()
    # input()
    return areaA + areaB - get_intersection(pD, pG)


def get_intersection_over_union(pD, pG):
    return get_intersection(pD, pG) / get_union(pD, pG)


class MyDataset(Dataset):
    mean = torch.tensor([0.5708, 0.5661, 0.5395])
    std = torch.tensor([0.3128, 0.2978, 0.3172])

    def __init__(self, root=None, train=True, transforms=None):
        self.path = root
        self.transforms = transforms
        self.dataset = os.listdir(self.path)

        self.dataset.sort(key=lambda x: int(x[:x.index(".")]))  # 根据图片的序号进行排序
        if train:
            # train == True 0-3999为训练集 这里2000张正样本，2000张负样本
            self.dataset = self.dataset[:4000]
        else:
            # train ==False 4000-4999为测试集，这里500张正样本，500张负样本
            self.dataset = self.dataset[:1000]
            # 获取数集的大小

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, index):
        imgpath = self.dataset[index]  # 跟据索引获取图片
        img = Image.open(os.path.join(self.path, imgpath))
        # transforms=trans.Compose([trans.ToTensor(), trans.Normalize(MyDataset.mean, MyDataset.std)])
        data = self.transforms(img)
        labels = imgpath.split(".")
        axes = np.array(labels[1:5], dtype=np.float32) / 224
        category = np.array(labels[5:6], dtype=np.float32)  # 这里取的是标签，1 和 0
        # 拼接列表，这里有先后顺序
        target = np.concatenate((axes, category))
        return data, target


class MyNetWork(nn.Module):
    def __init__(self):
        super(MyNetWork, self).__init__()
        self.convlution_layer = nn.Sequential(
            nn.Conv2d(3, 16, 3, 1),  # (224 - 3 + 2 * 0)/1+1 =222
            nn.ReLU(inplace=True),
            nn.Conv2d(16, 32, 3, 1),  # (222 - 3 + 2*0)/1 +1 = 220
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (220 - 2 + 2*0) / 2 +1 = 110
            nn.Conv2d(32, 128, 3, 1),  # (110 -3 + 2*0) /1 +1 =108
            nn.ReLU(inplace=True),
            # nn.Conv2d(64, 128, 3, 1), # (108 - 3 + 2*0 )/1 + 1 =106
            # nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # (108 -2 + 2*0)/2 + 1 = 54
            nn.Conv2d(128, 256, 3, 1),  # (54 - 3 + 2*0)/1 + 1 = 52
            nn.ReLU(inplace=True),
            # nn.Conv2d(256, 64, 3, 1), # (51 - 3 + 2*0)/1 + 1 = 49
            # nn.ReLU(inplace=True),
            nn.AvgPool2d(2, 2),  # (52 -2 + 2*0) /2 +1 =26
            nn.Conv2d(256, 64, 3, 1),  # (26 - 3)/1 +1= 24
            nn.ReLU(inplace=True),
            nn.AvgPool2d(2, 2),  # (24 -2 )/2 +1 = 12
            nn.Conv2d(64, 32, 3, 1)  # (12 - 3)/1 +1 = 10
        )
        self.MLP_layer = nn.Sequential(
            nn.Linear(32 * 10 * 10, 128),  # 输出了32个特征图，且特征图大小为10*10
            nn.ReLU(),
            nn.Linear(128, 5)
        )
        # 或者利用全卷积对特征进行拼接，用卷积核大小为1*1的进行卷积
        # self.cnn_layer2 = nn.Sequential(
        #    nn.Conv2d(32,128,10,1),  # 这里用10*10 的卷积核，因为特征图是10*10大小的，进行卷积后就得到大小为1*1的特征图
        #    nn.ReLU(True),
        #    nn.Conv2d(128,5,1,1)   # 输出5个结果，用1*1大小的卷积核进行卷积
        # )

    def forward(self, x):
        x = self.convlution_layer(x)
        x = torch.reshape(x, shape=(-1, 32 * 10 * 10))
        out = self.MLP_layer(x)
        category = torch.sigmoid(
            out[:, 4])  # 最后一个为标签0, 1，sigmoid表示概率，一般用于二分类，因为它的输出值只会偏向两边，softmax常用于多分类。（这里用sigmoid作分类）
        axes = torch.relu(out[:, :4])  # 前四个为坐标 （这里作回归，因为坐标是正数，所以用relu来解决）
        return axes, category


class Trainer:

    def __init__(self):
        transforms = trans.Compose([
            trans.ToTensor(),
            trans.Normalize(MyDataset.mean, MyDataset.std)
        ])
        self.train_dataset = MyDataset(root=r"../train_picture", train=True, transforms=transforms)
        self.test_dataset = MyDataset(root=r"../test_picture", train=False, transforms=transforms)
        self.net = MyNetWork().cuda()
        # 回归问题最常用的损失函数是均方误差MSELoss，定义如下：
        self.offset_lossfunc = nn.MSELoss().cuda()

        # BCELoss(Binary Cross Entropy Loss)，就是交叉熵应用于二分类时候的特殊形式，一般都和sigmoid一起用.
        self.category_lossfunc = nn.BCELoss().cuda()
        self.optimier = torch.optim.Adam(self.net.parameters(), lr=1e-5)

    def train(self):
        # 这里需要注意下，如果以后在训练的过程中出现了训练中断了，可以加载保存的网络模型继续接着训练。
        #  if os.path.exists("models/net2.pth"):
        #      self.net = torch.load("models/net2.pth")
        #      print("exists")

        trainloader = DataLoader(dataset=self.train_dataset, batch_size=160, shuffle=True)
        losses = []
        ii = 0
        print("*")
        # input()
        for i in range(20):
            ii=0
            print("epochs:{}".format(i))
            for j, (x, y) in enumerate(trainloader):
                print(ii)
                ii+=1
                print(x.size(), y.size())
                if torch.cuda.is_available():
                    x = x.cuda()
                    y = y.cuda()
                axes, category = self.net(x)  # 前向传播输出坐标和标签
                # (106,)   (106,)
                loss1 = self.offset_lossfunc(axes, y[:, 0:4])
                loss2 = self.category_lossfunc(category, y[:, 4])  # (106,4)
                loss = loss1 + loss2
                if j % 5 == 0:
                    losses.append(loss.float())
                    print("{}/{},loss:{}".format(j, len(trainloader), loss.float()))
                    plt.clf()
                    plt.plot(losses)
                    plt.pause(0.1)

                self.optimier.zero_grad()
                loss.backward()
                self.optimier.step()
                del x, y, category, axes, loss1, loss2, loss

        torch.save(self.net, "models/net33.pth")

    def test(self):
        testloader = DataLoader(dataset=self.test_dataset, batch_size=100, shuffle=False)
        self.net = torch.load("models/net33.pth")
        total = 0
        nnn = 0
        total_iou = 0
        ii=0
        print("ii=", ii)
        for x, y in testloader:
            x, y = x.cuda(), y.cuda()
            axes, category = self.net(x)
            total += (category.round() == y[:, 4]).sum()  # 预测值等于标签的总数
            index = category.round() == 1
            """
            这里表示有小黄人的图片的索引集 （最后结果是True 和 False的集合）
            形如：tensor([True, True, False, True, False, True, True, False, True, True])
            """
            target = y[index]  # 有小黄人的图片的标签（包括坐标和分类标签）
            """
            还原有小黄人的图片，因为现在要可视化图片，所以要把之前对图片进行的归一化和去均值操作逆向还原回去。
            数据预处理的时候对其做了标准化：处理后的图片=（原始img/255 - mean）/ std  那么现在计算原始图片，原始img =(处理后的图片 * std  + mean)*255
            """
            x = (x[index].cpu() * MyDataset.std.reshape(-1, 3, 1, 1) + MyDataset.mean.reshape(-1, 3, 1, 1))
            # 还原预测为正样本的数据。不用乘以255.。trans.ToPILImage("RGB"):自动会乘以255

            for j, i in enumerate(axes[index]):  # j 为enumerate自动产生的索引
                boxes = (i.data.cpu().numpy() * 224).astype(np.int32)  # 还原预测坐标并将其转化为无符号整型
                target_box = (target[j, 0:4].data.cpu().numpy() * 224).astype(np.int32)  # 还原目标坐标并将其转化为无符号整型
                img = trans.ToPILImage()(x[j])  # 转换图片
                """
                torchvision.transforms.ToPILImage
                对于一个Tensor的转化过程是：
                1. 将张量的每个元素乘上255
                2. 将张量的数据类型有FloatTensor转化成Uint8
                3. 将张量转化成numpy的ndarray类型
                4. 对ndarray对象做transpose (1, 2, 0)的操作
                5. 利用Image下的fromarray函数，将ndarray对象转化成PILImage形式
                6. 输出PILImage
                """
                # plt.clf()
                # plt.axis("off")
                # draw = ImageDraw.Draw(img)
                # draw.rectangle(boxes.tolist(), outline="red")  # 预测值
                # draw.rectangle(target_box.tolist(), outline="yellow")  # 原始值
                # plt.imshow(img)
                # plt.savefig("./result/{}.jpg".format(nnn))
                nnn += 1
                # plt.pause(0.1)

                de = rectangle_to_polygon(boxes.tolist())
                gt = rectangle_to_polygon(target_box.tolist())
                iou = get_intersection_over_union(de, gt)
                total_iou += iou

            del x, y, category, axes, index, target
        print("正确率:", total, 'iou:', total_iou / nnn)


if __name__ == '__main__':
    a = Trainer()
    time_start=time.time()
    # a.train()
    time_end1=time.time()
    a.test()
    time_end2=time.time()
    print("a.trian 耗时：{:.2f}s".format(time_end1-time_start))
    print("a.test 耗时：{:.2f}s".format(time_end2-time_end1))