【/108533949/】基于inceptionV3模型的ssd训练程序
程序员文章站
2024-03-16 23:09:46
...
在train.py中,把import vgg.py换成这个文件
可以注意到, inception_source = [768,2048]我是直接写的,我选择mixed_6e及mixed_7c这两层出来的图
预训练模型下载
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
from layers import *
from data import voc, coco, person_coco
import os
class SSD(nn.Module):
def __init__(self, phase, size, base, extras, head, num_classes):
super(SSD, self).__init__()
self.phase = phase
self.num_classes = num_classes
self.cfg = (coco, voc, person_coco)[num_classes == 2]
self.priorbox = PriorBox(self.cfg)
# self.priors = Variable(self.priorbox.forward(), volatile=True) ----改
# self.priors = Variable(self.priorbox.forward())
with torch.no_grad():
self.priors = self.priorbox.forward()
self.size = size
# SSD network
self.inception = base
# Layer learns to scale the l2 normalized features from conv4_3
self.L2Norm = L2Norm(512, 20)
self.extras = nn.ModuleList(extras)
self.loc = nn.ModuleList(head[0])
self.conf = nn.ModuleList(head[1])
if phase == 'test':
self.softmax = nn.Softmax(dim=-1)
self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)
def forward(self, x):
"""Applies network layers and ops on input image(s) x.
Args:
x: input image or batch of images. Shape: [batch,3,300,300].
Return:
Depending on phase:
test:
Variable(tensor) of output class label predictions,
confidence score, and corresponding location predictions for
each object detected. Shape: [batch,topk,7]
train:
list of concat outputs from:
1: confidence layers, Shape: [batch*num_priors,num_classes]
2: localization layers, Shape: [batch,num_priors*4]
3: priorbox layers, Shape: [2,num_priors*4]
"""
sources = list()
loc = list()
conf = list()
# # apply vgg up to conv4_3 relu
# for k in range(23):
# x = self.vgg[k](x)
#
# s = self.L2Norm(x)
# sources.append(s)
#
# # apply vgg up to fc7
# for k in range(23, len(self.vgg)):
# x = self.vgg[k](x)
x,xx = forword(self.inception ,x)
sources.append(xx)
sources.append(x)
# apply extra layers and cache source layer outputs
for k, v in enumerate(self.extras):
#print(k)
x = F.relu(v(x), inplace=True)
if k % 2 == 1:
sources.append(x)
# apply multibox head to source layers
for (x, l, c) in zip(sources, self.loc, self.conf):
#print("size - x :", x.size())
loc.append(l(x).permute(0, 2, 3, 1).contiguous())
conf.append(c(x).permute(0, 2, 3, 1).contiguous())
loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
if self.phase == "test":
output = self.detect(
loc.view(loc.size(0), -1, 4), # loc preds
self.softmax(conf.view(conf.size(0), -1,
self.num_classes)), # conf preds
self.priors.type(type(x.data)) # default boxes
)
else:
output = (
loc.view(loc.size(0), -1, 4),
conf.view(conf.size(0), -1, self.num_classes),
self.priors
)
return output
def load_weights(self, base_file):
other, ext = os.path.splitext(base_file)
if ext == '.pkl' or '.pth':
print('Loading weights into state dict...')
self.load_state_dict(torch.load(base_file,
map_location=lambda storage, loc: storage))
print('Finished!')
else:
print('Sorry only .pth and .pkl files supported.')
def add_extras(cfg, i, batch_norm=False):
layers = []
in_channels = i
flag = False
for k, v in enumerate(cfg):
if in_channels != 'S':
if v == 'S':
layers += [nn.Conv2d(in_channels, cfg[k + 1],
kernel_size=(1, 3)[flag], stride=2, padding=1)]
else:
layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
flag = not flag
in_channels = v
return layers
def multibox(inception_v3, extra_layers, cfg, num_classes):
loc_layers = []
conf_layers = []
inception_source = [768,
2048]
for k, v in enumerate(inception_source):
loc_layers += [nn.Conv2d(v,cfg[k] * 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(v,cfg[k] * num_classes, kernel_size=3, padding=1)]
for k, v in enumerate(extra_layers[1::2], 2):
loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
* 4, kernel_size=3, padding=1)]
conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
* num_classes, kernel_size=3, padding=1)]
return inception_v3, extra_layers, (loc_layers, conf_layers)
base = {
'300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
512, 512, 512],
'512': [],
}
# extras = {
# '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
# '512': [],
# }
extras = {
'300': [256,'S', 512, 128],
'512': [],
}
mbox = {
'300': [4, 6, 6, 6, 4, 4], # number of boxes per feature map location
'512': [],
}
def forword(inception_v3,x):
# N x 3 x 299 x 299
x = inception_v3.Conv2d_1a_3x3(x)
# N x 32 x 149 x 149
x = inception_v3.Conv2d_2a_3x3(x)
# N x 32 x 147 x 147
x = inception_v3.Conv2d_2b_3x3(x)
# N x 64 x 147 x 147
x = F.max_pool2d(x, kernel_size=3, stride=2)
# N x 64 x 73 x 73
x = inception_v3.Conv2d_3b_1x1(x)
# N x 80 x 73 x 73
x = inception_v3.Conv2d_4a_3x3(x)
# N x 192 x 71 x 71
x = F.max_pool2d(x, kernel_size=3, stride=2)
# N x 192 x 35 x 35
x = inception_v3.Mixed_5b(x)
# N x 256 x 35 x 35
x = inception_v3.Mixed_5c(x)
# N x 288 x 35 x 35
x = inception_v3.Mixed_5d(x)
# N x 288 x 35 x 35
x = inception_v3.Mixed_6a(x)
# N x 768 x 17 x 17
x = inception_v3.Mixed_6b(x)
# N x 768 x 17 x 17
x = inception_v3.Mixed_6c(x)
# N x 768 x 17 x 17
x = inception_v3.Mixed_6d(x)
# N x 768 x 17 x 17
xx = inception_v3.Mixed_6e(x)
# N x 768 x 17 x 17
if inception_v3.training and inception_v3.aux_logits:
aux = inception_v3.AuxLogits(xx)
# N x 768 x 17 x 17
x = inception_v3.Mixed_7a(xx)
# N x 1280 x 8 x 8
x = inception_v3.Mixed_7b(x)
# N x 2048 x 8 x 8
x = inception_v3.Mixed_7c(x)
# N x 2048 x 8 x 8
return x,xx
def build_ssd(phase, size=300, num_classes=21):
if phase != "test" and phase != "train":
print("ERROR: Phase: " + phase + " not recognized")
return
if size != 300:
print("ERROR: You specified size " + repr(size) + ". However, " +
"currently only SSD300 (size=300) is supported!")
return
inception_v3 = torchvision.models.inception_v3()
base_, extras_, head_ = multibox(inception_v3,
add_extras(extras[str(size)], 2048),
mbox[str(size)], num_classes)
return SSD(phase, size, base_, extras_, head_, num_classes)
下一篇: 二分法应用NUSOJ 3053集N“福”