【/108533949/】基于inceptionV3模型的ssd训练程序

程序员文章站 2024-03-16 23:09:46
...
在train.py中，把import vgg.py换成这个文件
可以注意到， inception_source = [768,2048]我是直接写的，我选择mixed_6e及mixed_7c这两层出来的图
预训练模型下载
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
from layers import *
from data import voc, coco, person_coco
import os


class SSD(nn.Module):


    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc, person_coco)[num_classes == 2]
        self.priorbox = PriorBox(self.cfg)
        # self.priors = Variable(self.priorbox.forward(), volatile=True) ----改
        # self.priors = Variable(self.priorbox.forward())
        with torch.no_grad():
            self.priors = self.priorbox.forward()
        self.size = size

        # SSD network
        self.inception = base
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()

        # # apply vgg up to conv4_3 relu
        # for k in range(23):
        #     x = self.vgg[k](x)
        #
        # s = self.L2Norm(x)
        # sources.append(s)
        #
        # # apply vgg up to fc7
        # for k in range(23, len(self.vgg)):
        #     x = self.vgg[k](x)
        x,xx = forword(self.inception ,x)
        sources.append(xx)
        sources.append(x)

        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            #print(k)
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # apply multibox head to source layers

        for (x, l, c) in zip(sources, self.loc, self.conf):
            #print("size - x :", x.size())
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (
                loc.view(loc.size(0), -1, 4),
                conf.view(conf.size(0), -1, self.num_classes),
                self.priors
            )
        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(torch.load(base_file,
                                            map_location=lambda storage, loc: storage))
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')




def add_extras(cfg, i, batch_norm=False):
    layers = []
    in_channels = i
    flag = False
    for k, v in enumerate(cfg):
        if in_channels != 'S':
            if v == 'S':
                layers += [nn.Conv2d(in_channels, cfg[k + 1],
                                     kernel_size=(1, 3)[flag], stride=2, padding=1)]
            else:
                layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
            flag = not flag
        in_channels = v
    return layers


def multibox(inception_v3, extra_layers, cfg, num_classes):

    loc_layers = []
    conf_layers = []
    inception_source = [768,
                  2048]
    for k, v in enumerate(inception_source):
        
        loc_layers += [nn.Conv2d(v,cfg[k] * 4, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(v,cfg[k] * num_classes, kernel_size=3, padding=1)]
    for k, v in enumerate(extra_layers[1::2], 2):

        loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
                                 * 4, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
                                  * num_classes, kernel_size=3, padding=1)]

    return inception_v3, extra_layers, (loc_layers, conf_layers)


base = {
    '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
            512, 512, 512],
    '512': [],
}
# extras = {
#     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
#     '512': [],
# }
extras = {
    '300': [256,'S', 512, 128],
    '512': [],
}
mbox = {
    '300': [4, 6, 6, 6, 4, 4],  # number of boxes per feature map location
    '512': [],
}

def forword(inception_v3,x):
    # N x 3 x 299 x 299
    x = inception_v3.Conv2d_1a_3x3(x)
    # N x 32 x 149 x 149
    x = inception_v3.Conv2d_2a_3x3(x)
    # N x 32 x 147 x 147
    x = inception_v3.Conv2d_2b_3x3(x)
    # N x 64 x 147 x 147
    x = F.max_pool2d(x, kernel_size=3, stride=2)
    # N x 64 x 73 x 73
    x = inception_v3.Conv2d_3b_1x1(x)
    # N x 80 x 73 x 73
    x = inception_v3.Conv2d_4a_3x3(x)
    # N x 192 x 71 x 71
    x = F.max_pool2d(x, kernel_size=3, stride=2)
    # N x 192 x 35 x 35
    x = inception_v3.Mixed_5b(x)
    # N x 256 x 35 x 35
    x = inception_v3.Mixed_5c(x)
    # N x 288 x 35 x 35
    x = inception_v3.Mixed_5d(x)
    # N x 288 x 35 x 35
    x = inception_v3.Mixed_6a(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6b(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6c(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6d(x)
    # N x 768 x 17 x 17
    xx = inception_v3.Mixed_6e(x)
    # N x 768 x 17 x 17
    if inception_v3.training and inception_v3.aux_logits:
        aux = inception_v3.AuxLogits(xx)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_7a(xx)
    # N x 1280 x 8 x 8
    x = inception_v3.Mixed_7b(x)
    # N x 2048 x 8 x 8
    x = inception_v3.Mixed_7c(x)
    # N x 2048 x 8 x 8
    return x,xx
def build_ssd(phase, size=300, num_classes=21):
    if phase != "test" and phase != "train":
        print("ERROR: Phase: " + phase + " not recognized")
        return
    if size != 300:
        print("ERROR: You specified size " + repr(size) + ". However, " +
              "currently only SSD300 (size=300) is supported!")
        return
    inception_v3 = torchvision.models.inception_v3()


    base_, extras_, head_ = multibox(inception_v3,
                                     add_extras(extras[str(size)], 2048),
                                     mbox[str(size)], num_classes)

    return SSD(phase, size, base_, extras_, head_, num_classes)