欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

【/108533949/】基于inceptionV3模型的ssd训练程序

程序员文章站 2024-03-16 23:09:46
...

在train.py中,把import vgg.py换成这个文件

可以注意到,    inception_source = [768,2048]我是直接写的,我选择mixed_6e及mixed_7c这两层出来的图

预训练模型下载

返回原博客

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.autograd import Variable
from layers import *
from data import voc, coco, person_coco
import os


class SSD(nn.Module):


    def __init__(self, phase, size, base, extras, head, num_classes):
        super(SSD, self).__init__()
        self.phase = phase
        self.num_classes = num_classes
        self.cfg = (coco, voc, person_coco)[num_classes == 2]
        self.priorbox = PriorBox(self.cfg)
        # self.priors = Variable(self.priorbox.forward(), volatile=True) ----改
        # self.priors = Variable(self.priorbox.forward())
        with torch.no_grad():
            self.priors = self.priorbox.forward()
        self.size = size

        # SSD network
        self.inception = base
        # Layer learns to scale the l2 normalized features from conv4_3
        self.L2Norm = L2Norm(512, 20)
        self.extras = nn.ModuleList(extras)

        self.loc = nn.ModuleList(head[0])
        self.conf = nn.ModuleList(head[1])

        if phase == 'test':
            self.softmax = nn.Softmax(dim=-1)
            self.detect = Detect(num_classes, 0, 200, 0.01, 0.45)

    def forward(self, x):
        """Applies network layers and ops on input image(s) x.

        Args:
            x: input image or batch of images. Shape: [batch,3,300,300].

        Return:
            Depending on phase:
            test:
                Variable(tensor) of output class label predictions,
                confidence score, and corresponding location predictions for
                each object detected. Shape: [batch,topk,7]

            train:
                list of concat outputs from:
                    1: confidence layers, Shape: [batch*num_priors,num_classes]
                    2: localization layers, Shape: [batch,num_priors*4]
                    3: priorbox layers, Shape: [2,num_priors*4]
        """
        sources = list()
        loc = list()
        conf = list()

        # # apply vgg up to conv4_3 relu
        # for k in range(23):
        #     x = self.vgg[k](x)
        #
        # s = self.L2Norm(x)
        # sources.append(s)
        #
        # # apply vgg up to fc7
        # for k in range(23, len(self.vgg)):
        #     x = self.vgg[k](x)
        x,xx = forword(self.inception ,x)
        sources.append(xx)
        sources.append(x)

        # apply extra layers and cache source layer outputs
        for k, v in enumerate(self.extras):
            #print(k)
            x = F.relu(v(x), inplace=True)
            if k % 2 == 1:
                sources.append(x)

        # apply multibox head to source layers

        for (x, l, c) in zip(sources, self.loc, self.conf):
            #print("size - x :", x.size())
            loc.append(l(x).permute(0, 2, 3, 1).contiguous())
            conf.append(c(x).permute(0, 2, 3, 1).contiguous())

        loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
        conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
        if self.phase == "test":
            output = self.detect(
                loc.view(loc.size(0), -1, 4),  # loc preds
                self.softmax(conf.view(conf.size(0), -1,
                                       self.num_classes)),  # conf preds
                self.priors.type(type(x.data))  # default boxes
            )
        else:
            output = (
                loc.view(loc.size(0), -1, 4),
                conf.view(conf.size(0), -1, self.num_classes),
                self.priors
            )
        return output

    def load_weights(self, base_file):
        other, ext = os.path.splitext(base_file)
        if ext == '.pkl' or '.pth':
            print('Loading weights into state dict...')
            self.load_state_dict(torch.load(base_file,
                                            map_location=lambda storage, loc: storage))
            print('Finished!')
        else:
            print('Sorry only .pth and .pkl files supported.')




def add_extras(cfg, i, batch_norm=False):
    layers = []
    in_channels = i
    flag = False
    for k, v in enumerate(cfg):
        if in_channels != 'S':
            if v == 'S':
                layers += [nn.Conv2d(in_channels, cfg[k + 1],
                                     kernel_size=(1, 3)[flag], stride=2, padding=1)]
            else:
                layers += [nn.Conv2d(in_channels, v, kernel_size=(1, 3)[flag])]
            flag = not flag
        in_channels = v
    return layers


def multibox(inception_v3, extra_layers, cfg, num_classes):

    loc_layers = []
    conf_layers = []
    inception_source = [768,
                  2048]
    for k, v in enumerate(inception_source):
        
        loc_layers += [nn.Conv2d(v,cfg[k] * 4, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(v,cfg[k] * num_classes, kernel_size=3, padding=1)]
    for k, v in enumerate(extra_layers[1::2], 2):

        loc_layers += [nn.Conv2d(v.out_channels, cfg[k]
                                 * 4, kernel_size=3, padding=1)]
        conf_layers += [nn.Conv2d(v.out_channels, cfg[k]
                                  * num_classes, kernel_size=3, padding=1)]

    return inception_v3, extra_layers, (loc_layers, conf_layers)


base = {
    '300': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'C', 512, 512, 512, 'M',
            512, 512, 512],
    '512': [],
}
# extras = {
#     '300': [256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256],
#     '512': [],
# }
extras = {
    '300': [256,'S', 512, 128],
    '512': [],
}
mbox = {
    '300': [4, 6, 6, 6, 4, 4],  # number of boxes per feature map location
    '512': [],
}

def forword(inception_v3,x):
    # N x 3 x 299 x 299
    x = inception_v3.Conv2d_1a_3x3(x)
    # N x 32 x 149 x 149
    x = inception_v3.Conv2d_2a_3x3(x)
    # N x 32 x 147 x 147
    x = inception_v3.Conv2d_2b_3x3(x)
    # N x 64 x 147 x 147
    x = F.max_pool2d(x, kernel_size=3, stride=2)
    # N x 64 x 73 x 73
    x = inception_v3.Conv2d_3b_1x1(x)
    # N x 80 x 73 x 73
    x = inception_v3.Conv2d_4a_3x3(x)
    # N x 192 x 71 x 71
    x = F.max_pool2d(x, kernel_size=3, stride=2)
    # N x 192 x 35 x 35
    x = inception_v3.Mixed_5b(x)
    # N x 256 x 35 x 35
    x = inception_v3.Mixed_5c(x)
    # N x 288 x 35 x 35
    x = inception_v3.Mixed_5d(x)
    # N x 288 x 35 x 35
    x = inception_v3.Mixed_6a(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6b(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6c(x)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_6d(x)
    # N x 768 x 17 x 17
    xx = inception_v3.Mixed_6e(x)
    # N x 768 x 17 x 17
    if inception_v3.training and inception_v3.aux_logits:
        aux = inception_v3.AuxLogits(xx)
    # N x 768 x 17 x 17
    x = inception_v3.Mixed_7a(xx)
    # N x 1280 x 8 x 8
    x = inception_v3.Mixed_7b(x)
    # N x 2048 x 8 x 8
    x = inception_v3.Mixed_7c(x)
    # N x 2048 x 8 x 8
    return x,xx
def build_ssd(phase, size=300, num_classes=21):
    if phase != "test" and phase != "train":
        print("ERROR: Phase: " + phase + " not recognized")
        return
    if size != 300:
        print("ERROR: You specified size " + repr(size) + ". However, " +
              "currently only SSD300 (size=300) is supported!")
        return
    inception_v3 = torchvision.models.inception_v3()


    base_, extras_, head_ = multibox(inception_v3,
                                     add_extras(extras[str(size)], 2048),
                                     mbox[str(size)], num_classes)

    return SSD(phase, size, base_, extras_, head_, num_classes)

 

相关标签: pytorch