测试batch_size 的大小对训练时间的影响: 提速不是成比例的
程序员文章站
2022-06-13 09:58:47
...
eg1:
train 的batch_size 设置为 160 , test 的 batch_size设置为100(设置为160会爆显存)
训练集 4000 张, 测试集 1000张
eg2:
train 的batch_size 设置为 80 , test 的 batch_size设置为100
训练集 4000张 , 测试集 1000张
可以看到提速不是成比例的。
附上源码:
import torch
from torch.utils.data import Dataset, DataLoader
import os
from PIL import Image
import numpy as np
import torchvision.transforms as trans
import torch.nn as nn
import matplotlib.pyplot as plt
from PIL import ImageDraw
import Polygon as plg
import time
def rectangle_to_polygon(rect):
# print(rect)
resBoxes = np.empty([1, 8], dtype='int32')
resBoxes[0, 0] = int(rect[0])
resBoxes[0, 4] = int(rect[3])
resBoxes[0, 1] = int(rect[0])
resBoxes[0, 5] = int(rect[1])
resBoxes[0, 2] = int(rect[2])
resBoxes[0, 6] = int(rect[1])
resBoxes[0, 3] = int(rect[2])
resBoxes[0, 7] = int(rect[3])
pointMat = resBoxes.reshape([2, 4]).T
# print(pointMat)
return plg.Polygon(pointMat)
def get_intersection(pD, pG):
pInt = pD & pG
if len(pInt) == 0:
return 0
return pInt.area()
def get_union(pD, pG):
# print("pd=",pD)
# print("pg=",pG)
areaA = pD.area()
areaB = pG.area()
# input()
return areaA + areaB - get_intersection(pD, pG)
def get_intersection_over_union(pD, pG):
return get_intersection(pD, pG) / get_union(pD, pG)
class MyDataset(Dataset):
mean = torch.tensor([0.5708, 0.5661, 0.5395])
std = torch.tensor([0.3128, 0.2978, 0.3172])
def __init__(self, root=None, train=True, transforms=None):
self.path = root
self.transforms = transforms
self.dataset = os.listdir(self.path)
self.dataset.sort(key=lambda x: int(x[:x.index(".")])) # 根据图片的序号进行排序
if train:
# train == True 0-3999为训练集 这里2000张正样本,2000张负样本
self.dataset = self.dataset[:4000]
else:
# train ==False 4000-4999为测试集,这里500张正样本,500张负样本
self.dataset = self.dataset[:1000]
# 获取数集的大小
def __len__(self):
return len(self.dataset)
def __getitem__(self, index):
imgpath = self.dataset[index] # 跟据索引获取图片
img = Image.open(os.path.join(self.path, imgpath))
# transforms=trans.Compose([trans.ToTensor(), trans.Normalize(MyDataset.mean, MyDataset.std)])
data = self.transforms(img)
labels = imgpath.split(".")
axes = np.array(labels[1:5], dtype=np.float32) / 224
category = np.array(labels[5:6], dtype=np.float32) # 这里取的是标签,1 和 0
# 拼接列表,这里有先后顺序
target = np.concatenate((axes, category))
return data, target
class MyNetWork(nn.Module):
def __init__(self):
super(MyNetWork, self).__init__()
self.convlution_layer = nn.Sequential(
nn.Conv2d(3, 16, 3, 1), # (224 - 3 + 2 * 0)/1+1 =222
nn.ReLU(inplace=True),
nn.Conv2d(16, 32, 3, 1), # (222 - 3 + 2*0)/1 +1 = 220
nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2), # (220 - 2 + 2*0) / 2 +1 = 110
nn.Conv2d(32, 128, 3, 1), # (110 -3 + 2*0) /1 +1 =108
nn.ReLU(inplace=True),
# nn.Conv2d(64, 128, 3, 1), # (108 - 3 + 2*0 )/1 + 1 =106
# nn.ReLU(inplace=True),
nn.MaxPool2d(2, 2), # (108 -2 + 2*0)/2 + 1 = 54
nn.Conv2d(128, 256, 3, 1), # (54 - 3 + 2*0)/1 + 1 = 52
nn.ReLU(inplace=True),
# nn.Conv2d(256, 64, 3, 1), # (51 - 3 + 2*0)/1 + 1 = 49
# nn.ReLU(inplace=True),
nn.AvgPool2d(2, 2), # (52 -2 + 2*0) /2 +1 =26
nn.Conv2d(256, 64, 3, 1), # (26 - 3)/1 +1= 24
nn.ReLU(inplace=True),
nn.AvgPool2d(2, 2), # (24 -2 )/2 +1 = 12
nn.Conv2d(64, 32, 3, 1) # (12 - 3)/1 +1 = 10
)
self.MLP_layer = nn.Sequential(
nn.Linear(32 * 10 * 10, 128), # 输出了32个特征图,且特征图大小为10*10
nn.ReLU(),
nn.Linear(128, 5)
)
# 或者利用全卷积对特征进行拼接,用卷积核大小为1*1的进行卷积
# self.cnn_layer2 = nn.Sequential(
# nn.Conv2d(32,128,10,1), # 这里用10*10 的卷积核,因为特征图是10*10大小的,进行卷积后就得到大小为1*1的特征图
# nn.ReLU(True),
# nn.Conv2d(128,5,1,1) # 输出5个结果,用1*1大小的卷积核进行卷积
# )
def forward(self, x):
x = self.convlution_layer(x)
x = torch.reshape(x, shape=(-1, 32 * 10 * 10))
out = self.MLP_layer(x)
category = torch.sigmoid(
out[:, 4]) # 最后一个为标签0, 1,sigmoid表示概率,一般用于二分类,因为它的输出值只会偏向两边,softmax常用于多分类。(这里用sigmoid作分类)
axes = torch.relu(out[:, :4]) # 前四个为坐标 (这里作回归,因为坐标是正数,所以用relu来解决)
return axes, category
class Trainer:
def __init__(self):
transforms = trans.Compose([
trans.ToTensor(),
trans.Normalize(MyDataset.mean, MyDataset.std)
])
self.train_dataset = MyDataset(root=r"../train_picture", train=True, transforms=transforms)
self.test_dataset = MyDataset(root=r"../test_picture", train=False, transforms=transforms)
self.net = MyNetWork().cuda()
# 回归问题最常用的损失函数是均方误差MSELoss,定义如下:
self.offset_lossfunc = nn.MSELoss().cuda()
# BCELoss(Binary Cross Entropy Loss),就是交叉熵应用于二分类时候的特殊形式,一般都和sigmoid一起用.
self.category_lossfunc = nn.BCELoss().cuda()
self.optimier = torch.optim.Adam(self.net.parameters(), lr=1e-5)
def train(self):
# 这里需要注意下,如果以后在训练的过程中出现了训练中断了,可以加载保存的网络模型继续接着训练。
# if os.path.exists("models/net2.pth"):
# self.net = torch.load("models/net2.pth")
# print("exists")
trainloader = DataLoader(dataset=self.train_dataset, batch_size=160, shuffle=True)
losses = []
ii = 0
print("*")
# input()
for i in range(20):
ii=0
print("epochs:{}".format(i))
for j, (x, y) in enumerate(trainloader):
print(ii)
ii+=1
print(x.size(), y.size())
if torch.cuda.is_available():
x = x.cuda()
y = y.cuda()
axes, category = self.net(x) # 前向传播输出坐标和标签
# (106,) (106,)
loss1 = self.offset_lossfunc(axes, y[:, 0:4])
loss2 = self.category_lossfunc(category, y[:, 4]) # (106,4)
loss = loss1 + loss2
if j % 5 == 0:
losses.append(loss.float())
print("{}/{},loss:{}".format(j, len(trainloader), loss.float()))
plt.clf()
plt.plot(losses)
plt.pause(0.1)
self.optimier.zero_grad()
loss.backward()
self.optimier.step()
del x, y, category, axes, loss1, loss2, loss
torch.save(self.net, "models/net33.pth")
def test(self):
testloader = DataLoader(dataset=self.test_dataset, batch_size=100, shuffle=False)
self.net = torch.load("models/net33.pth")
total = 0
nnn = 0
total_iou = 0
ii=0
print("ii=", ii)
for x, y in testloader:
x, y = x.cuda(), y.cuda()
axes, category = self.net(x)
total += (category.round() == y[:, 4]).sum() # 预测值等于标签的总数
index = category.round() == 1
"""
这里表示有小黄人的图片的索引集 (最后结果是True 和 False的集合)
形如:tensor([True, True, False, True, False, True, True, False, True, True])
"""
target = y[index] # 有小黄人的图片的标签(包括坐标和分类标签)
"""
还原有小黄人的图片,因为现在要可视化图片,所以要把之前对图片进行的归一化和去均值操作逆向还原回去。
数据预处理的时候对其做了标准化:处理后的图片=(原始img/255 - mean)/ std 那么现在计算原始图片,原始img =(处理后的图片 * std + mean)*255
"""
x = (x[index].cpu() * MyDataset.std.reshape(-1, 3, 1, 1) + MyDataset.mean.reshape(-1, 3, 1, 1))
# 还原预测为正样本的数据。不用乘以255.。trans.ToPILImage("RGB"):自动会乘以255
for j, i in enumerate(axes[index]): # j 为enumerate自动产生的索引
boxes = (i.data.cpu().numpy() * 224).astype(np.int32) # 还原预测坐标并将其转化为无符号整型
target_box = (target[j, 0:4].data.cpu().numpy() * 224).astype(np.int32) # 还原目标坐标并将其转化为无符号整型
img = trans.ToPILImage()(x[j]) # 转换图片
"""
torchvision.transforms.ToPILImage
对于一个Tensor的转化过程是:
1. 将张量的每个元素乘上255
2. 将张量的数据类型有FloatTensor转化成Uint8
3. 将张量转化成numpy的ndarray类型
4. 对ndarray对象做transpose (1, 2, 0)的操作
5. 利用Image下的fromarray函数,将ndarray对象转化成PILImage形式
6. 输出PILImage
"""
# plt.clf()
# plt.axis("off")
# draw = ImageDraw.Draw(img)
# draw.rectangle(boxes.tolist(), outline="red") # 预测值
# draw.rectangle(target_box.tolist(), outline="yellow") # 原始值
# plt.imshow(img)
# plt.savefig("./result/{}.jpg".format(nnn))
nnn += 1
# plt.pause(0.1)
de = rectangle_to_polygon(boxes.tolist())
gt = rectangle_to_polygon(target_box.tolist())
iou = get_intersection_over_union(de, gt)
total_iou += iou
del x, y, category, axes, index, target
print("正确率:", total, 'iou:', total_iou / nnn)
if __name__ == '__main__':
a = Trainer()
time_start=time.time()
# a.train()
time_end1=time.time()
a.test()
time_end2=time.time()
print("a.trian 耗时:{:.2f}s".format(time_end1-time_start))
print("a.test 耗时:{:.2f}s".format(time_end2-time_end1))
下一篇: vuecli3打包规范