欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

FCNs学习笔记01-重新实现作者的结果

程序员文章站 2022-06-12 16:37:38
...

学习思路:重现作者结果,实现自己的任务。

环境:win10 + Caffe的Python接口 Anaconda3+Python3.5 CUDA8.0 Cudnn5.1

caffe编译:官方过程,需要提前安装git

>>git clone https://github.com/BVLC/caffe.git

>>cd caffe

>>git checkout windows

'''Edit any of the options inside build_win.cmd tosuit your needs'''

在scripts文件夹中的build_win.cmd右键notepad打开,修改需要的参数

>>scripts\build_win.cmd

重现作者过程

1。下载FCNs源代码

>>git clone https://github.com/shelhamer/fcn.berkeleyvision.org

得到fcn.berkeleyvision.org文件夹,文件夹内有很多模型源代码。我用的是fcn8s的,数据集用sbdd训练voc验证,对应文件夹为voc-fcn8s。

2。推导infer.py,红色的是改过的,用到的fcn8s-heavy-pascal.caffemodel文件是作者训练好的模型,下载地址在voc-fcn8文件下caffemodel-url文件内,地址为http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel

import numpy as np
from PIL import Image
#添加caffe编译的目录
import sys
sys.path.append('../../caffe/python')
import caffe
import vis
# the demo image is "2007_000129" from PASCAL VOC
# load image, switch to BGR, subtract mean, and make dims C x H x W for Caffe
im = Image.open('demo/image.jpg')
in_ = np.array(im, dtype=np.float32)
in_ = in_[:,:,::-1]
in_ -= np.array((104.00698793,116.66876762,122.67891434))
in_ = in_.transpose((2,0,1))
# load net
net = caffe.Net('voc-fcn8s/deploy.prototxt', 'voc-fcn8s/fcn8s-heavy-pascal.caffemodel', caffe.TEST)
# shape for input (data blob is N x C x H x W), set data
net.blobs['data'].reshape(1, *in_.shape)
net.blobs['data'].data[...] = in_
# run net and take argmax for prediction
net.forward()
out = net.blobs['score'].data[0].argmax(axis=0)
#保存
# visualize segmentation in PASCAL VOC colors
voc_palette = vis.make_palette(21)
out_im = Image.fromarray(vis.color_seg(out, voc_palette))
out_im.save('demo/output.png')
masked_im = Image.fromarray(vis.vis_seg(im, out, voc_palette))
masked_im.save('demo/visualization.jpg')

3。训练solve.py,红色的是改过的

1)先下载sbdd和voc2012,其中sbdd用于训练,voc用于测试

sbdd是从别人那里下的,地址找不到了,需要dataset文件,没有的话可以用voc作训练集。readme有个网站,如下:

http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/resources.html

voc2012

https://pjreddie.com/projects/pascal-voc-dataset-mirror/

下载Train/ValidationData,解压

2)下载vgg16预训练模型

3)把voc_layers.py、surgery.py、score.py复制到voc-fcn8s文件内,训练需要加载。因为我的是python3,所以需要把print改为print()

FCNs学习笔记01-重新实现作者的结果

进入voc-fcn8s文件内,有如下几个文件(第一个是训练之后出现的)

FCNs学习笔记01-重新实现作者的结果

4)修改net.py,使用net.py生成train.prototxt和val.prototxt

import sys
sys.path.append('D:\\caffeForWINDOWS\\caffe\\python')
import caffe
from caffe import layers as L, params as P
from caffe.coord_map import crop
def conv_relu(bottom, nout, ks=3, stride=1, pad=1):
    conv = L.Convolution(bottom, kernel_size=ks, stride=stride,
        num_output=nout, pad=pad,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    return conv, L.ReLU(conv, in_place=True)
def max_pool(bottom, ks=2, stride=2):
    return L.Pooling(bottom, pool=P.Pooling.MAX, kernel_size=ks, stride=stride)
def fcn(split):
    n = caffe.NetSpec()
    pydata_params = dict(split=split, mean=(104.00699, 116.66877, 122.67892),
            seed=1337)
    if split == 'train':
        pydata_params['sbdd_dir'] = '../../sbdd/dataset'#下载的用于训练的数据
        pylayer = 'SBDDSegDataLayer'
    else:
        pydata_params['voc_dir'] = '../../VOC2012'#VOC2012数据
        pylayer = 'VOCSegDataLayer'
    n.data, n.label = L.Python(module='voc_layers', layer=pylayer,
            ntop=2, param_str=str(pydata_params))
    # the base net
    n.conv1_1, n.relu1_1 = conv_relu(n.data, 64, pad=100)
    n.conv1_2, n.relu1_2 = conv_relu(n.relu1_1, 64)
    n.pool1 = max_pool(n.relu1_2)
    n.conv2_1, n.relu2_1 = conv_relu(n.pool1, 128)
    n.conv2_2, n.relu2_2 = conv_relu(n.relu2_1, 128)
    n.pool2 = max_pool(n.relu2_2)
     n.conv3_1, n.relu3_1 = conv_relu(n.pool2, 256)
    n.conv3_2, n.relu3_2 = conv_relu(n.relu3_1, 256)
    n.conv3_3, n.relu3_3 = conv_relu(n.relu3_2, 256)
    n.pool3 = max_pool(n.relu3_3)
     n.conv4_1, n.relu4_1 = conv_relu(n.pool3, 512)
    n.conv4_2, n.relu4_2 = conv_relu(n.relu4_1, 512)
    n.conv4_3, n.relu4_3 = conv_relu(n.relu4_2, 512)
    n.pool4 = max_pool(n.relu4_3)
     n.conv5_1, n.relu5_1 = conv_relu(n.pool4, 512)
    n.conv5_2, n.relu5_2 = conv_relu(n.relu5_1, 512)
    n.conv5_3, n.relu5_3 = conv_relu(n.relu5_2, 512)
    n.pool5 = max_pool(n.relu5_3)
    # fully conv
    n.fc6, n.relu6 = conv_relu(n.pool5, 4096, ks=7, pad=0)
    n.drop6 = L.Dropout(n.relu6, dropout_ratio=0.5, in_place=True)
    n.fc7, n.relu7 = conv_relu(n.drop6, 4096, ks=1, pad=0)
    n.drop7 = L.Dropout(n.relu7, dropout_ratio=0.5, in_place=True)
    n.score_fr = L.Convolution(n.drop7, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.upscore2 = L.Deconvolution(n.score_fr,
        convolution_param=dict(num_output=21, kernel_size=4, stride=2,
            bias_term=False),
        param=[dict(lr_mult=0)])
    n.score_pool4 = L.Convolution(n.pool4, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool4c = crop(n.score_pool4, n.upscore2)
    n.fuse_pool4 = L.Eltwise(n.upscore2, n.score_pool4c,
            operation=P.Eltwise.SUM)
    n.upscore_pool4 = L.Deconvolution(n.fuse_pool4,
        convolution_param=dict(num_output=21, kernel_size=4, stride=2,
            bias_term=False),
        param=[dict(lr_mult=0)])
    n.score_pool3 = L.Convolution(n.pool3, num_output=21, kernel_size=1, pad=0,
        param=[dict(lr_mult=1, decay_mult=1), dict(lr_mult=2, decay_mult=0)])
    n.score_pool3c = crop(n.score_pool3, n.upscore_pool4)
    n.fuse_pool3 = L.Eltwise(n.upscore_pool4, n.score_pool3c,
            operation=P.Eltwise.SUM)
    n.upscore8 = L.Deconvolution(n.fuse_pool3,
        convolution_param=dict(num_output=21, kernel_size=16, stride=8,
            bias_term=False),
        param=[dict(lr_mult=0)])
    n.score = crop(n.upscore8, n.data)
    n.loss = L.SoftmaxWithLoss(n.score, n.label,
            loss_param=dict(normalize=False, ignore_label=255))
    return n.to_proto()
def make_net():
    with open('train.prototxt', 'w') as f:
        f.write(str(fcn('train')))
    with open('val.prototxt', 'w') as f:
        f.write(str(fcn('seg11valid')))
if __name__ == '__main__':
    make_net()

这会生成train.prototxt和val.prototxt,需要打开这两个文件,把fc6和fc7改成fc6_new和fc7_new(名字随意起的,和原来的不一样就行),假如红色的地方没有改,直接在这两个文件里修改param_str就可以了。

5)修改solve.py

import sys
sys.path.append('../../caffe/python')
import caffe
import surgery, score
import numpy as np
import os
import sys
try:
    import setproctitle
    setproctitle.setproctitle(os.path.basename(os.getcwd()))
except:
    pass
weights = '../../vgg16-fcn.caffemodel'#你的预训练权重地址
# init
#caffe.set_device(int(sys.argv[1]))#不知道为什么要注释
#caffe.set_mode_gpu()#显卡不行,只好使用cpu了,或者在做数据集的时候把图片调小一点
solver = caffe.SGDSolver('solver.prototxt')
solver.net.copy_from(weights)
# surgeries
interp_layers = [k for k in solver.net.params.keys() if 'up' in k]
surgery.interp(solver.net, interp_layers)
# scoring
val = np.loadtxt('../../segvalid111.txt', dtype=str)#这个文件是VOC2012/ImageSets/Segmentation文件下的val.txt文件,重命名就行,好像是用来记录测试用到的分割图的名称。
for _ in range(1):#少循环几次试试能不能成
    solver.step(5)
    score.seg_tests(solver, False, val, layer='score')#测试

6)修改solver.prototxt

train_net、test_net地址对应上train.prototxt和val.prototxt,snapshot_prefix对应训练结果保存的地址

train_net: "train.prototxt"
test_net: "val.prototxt"
test_iter: 736
# make test net, but don't invoke it from the solver itself
test_interval: 999999999
display: 1
average_loss: 20
lr_policy: "fixed"
# lr for unnormalized softmax
base_lr: 1e-14
# high momentum
momentum: 0.99
# no gradient accumulation
iter_size: 1
max_iter: 100000
weight_decay: 0.0005
snapshot: 4000
snapshot_prefix: "../../snapshot/"
test_initialization: false

7)score.py只把print改为print()

8)surgery.py不做修改

9)voc_layers.py

import caffe#训练时已经调用了caffe,不用再改
import numpy as np
from PIL import Image
import random
class VOCSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from PASCAL VOC
    one-at-a-time while reshaping the net to preserve dimensions.
    Use this to feed data to a fully convolutional network.
    """
    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:
        - voc_dir: path to PASCAL VOC year dir
        - split: train / val / test
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)
        for PASCAL VOC semantic segmentation.
        example
        params = dict(voc_dir="/path/to/PASCAL/VOC2011",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="val")
        """
        # config
        params = eval(self.param_str)
        self.voc_dir = params['voc_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)
        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")
        # load indices for images and labels
        split_f  = '{}/ImageSets/Segmentation/{}.txt'.format(self.voc_dir,
                self.split)#这个要求有./VOC2012/ImageSets/Segmentation/seg11valid.txt文件,同样把val.txt重命名
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 0
        # make eval deterministic
        if 'train' not in self.split:
            self.random = False
        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)
    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[0].reshape(1, *self.data.shape)
        top[1].reshape(1, *self.label.shape)
    def forward(self, bottom, top):
        # assign output
        top[0].data[...] = self.data
        top[1].data[...] = self.label
        # pick next input
        if self.random:
            self.idx = random.randint(0, len(self.indices)-1)
        else:
            self.idx += 1
            if self.idx == len(self.indices):
                self.idx = 0
    def backward(self, top, propagate_down, bottom):
        pass
    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/JPEGImages/{}.jpg'.format(self.voc_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_
    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        im = Image.open('{}/SegmentationClass/{}.png'.format(self.voc_dir, idx))
        label = np.array(im, dtype=np.uint8)
        label = label[np.newaxis, ...]
        return label
class SBDDSegDataLayer(caffe.Layer):
    """
    Load (input image, label image) pairs from the SBDD extended labeling
    of PASCAL VOC for semantic segmentation
    one-at-a-time while reshaping the net to preserve dimensions.

    Use this to feed data to a fully convolutional network.
    """
    def setup(self, bottom, top):
        """
        Setup data layer according to parameters:
        - sbdd_dir: path to SBDD `dataset` dir
        - split: train / seg11valid
        - mean: tuple of mean values to subtract
        - randomize: load in random order (default: True)
        - seed: seed for randomization (default: None / current time)
        for SBDD semantic segmentation.
        N.B.segv11alid is the set of segval11 that does not intersect with SBDD.
        Find it here: https://gist.github.com/shelhamer/edb330760338892d511e.
        example
        params = dict(sbdd_dir="/path/to/SBDD/dataset",
            mean=(104.00698793, 116.66876762, 122.67891434),
            split="valid")
        """
        # config
        params = eval(self.param_str)
        self.sbdd_dir = params['sbdd_dir']
        self.split = params['split']
        self.mean = np.array(params['mean'])
        self.random = params.get('randomize', True)
        self.seed = params.get('seed', None)
        # two tops: data and label
        if len(top) != 2:
            raise Exception("Need to define two tops: data and label.")
        # data layers have no bottoms
        if len(bottom) != 0:
            raise Exception("Do not define a bottom.")
        # load indices for images and labels
        split_f  = '{}/{}.txt'.format(self.sbdd_dir,
                self.split)
        self.indices = open(split_f, 'r').read().splitlines()
        self.idx = 0
        # make eval deterministic
        if 'train' not in self.split:
            self.random = False
        # randomization: seed and pick
        if self.random:
            random.seed(self.seed)
            self.idx = random.randint(0, len(self.indices)-1)
    def reshape(self, bottom, top):
        # load image + label image pair
        self.data = self.load_image(self.indices[self.idx])
        self.label = self.load_label(self.indices[self.idx])
        # reshape tops to fit (leading 1 is for batch dimension)
        top[0].reshape(1, *self.data.shape)
        top[1].reshape(1, *self.label.shape)
    def forward(self, bottom, top):
        # assign output
        top[0].data[...] = self.data
        top[1].data[...] = self.label

        # pick next input
        if self.random:
            self.idx = random.randint(0, len(self.indices)-1)
        else:
            self.idx += 1
            if self.idx == len(self.indices):
                self.idx = 
    def backward(self, top, propagate_down, bottom):
        pass
    def load_image(self, idx):
        """
        Load input image and preprocess for Caffe:
        - cast to float
        - switch channels RGB -> BGR
        - subtract mean
        - transpose to channel x height x width order
        """
        im = Image.open('{}/img/{}.jpg'.format(self.sbdd_dir, idx))
        in_ = np.array(im, dtype=np.float32)
        in_ = in_[:,:,::-1]
        in_ -= self.mean
        in_ = in_.transpose((2,0,1))
        return in_
    def load_label(self, idx):
        """
        Load label image as 1 x height x width integer array of label indices.
        The leading singleton dimension is required by the loss.
        """
        import scipy.io
        mat = scipy.io.loadmat('{}/cls/{}.mat'.format(self.sbdd_dir, idx))
        label = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
        label = label[np.newaxis, ...]
        return label
    #def load_label(self, idx):
    #    im = Image.open('{}/cls/{}.png'.format(self.sbdd_dir, idx))
    #    label = np.array(im, dtype=np.uint8)
    #    label = label[np.newaxis, ...]
    #    return label
    #这样可以直接使用分割图片而不用转换为mat图,不过目前还没整明白分割图的制作,没有测试
 

目前卡在制作自己数据这里,老是说label的number和prediction的number不匹配。

还有就是labelme制作的标签使用skimage里的函数上色永远都是按顺序上色。等做完了再更。。。

能力有限。。。

相关标签: 重现作者结果