tensorflow/model库源码Deeplabv3+实现(三)之VOC2012数据集重新划分训练集、验证集
程序员文章站
2022-07-08 09:42:32
...
1.PASACAL VOC2012数据集介绍
前面几节只是机械地运行代码,不知道深层含义。这里从新回到最开始的地方–数据集。voc2012数据集介绍网上有很多,这个介绍得很详细:PASCAL-VOC2012数据集(vocdevkit、Vocbenchmark_release)详细介绍。
VOCdevkit
+VOC2012
+Annotations(17125) {id}.xml形式保存信息
+ImageSets
+Action (33) 存放人的动作
+Layout(3) train.txt/val.tx/trainval.txt 存放人体部位
+Main(63) 以{class}_val.txt等形式命名
+Segmentation(3) train.txt等存放语义分割图像信息
+JPEGImages(17125) 原图jpg格式
+SegmentationClass(2913)分割图像png格式
+SegmentationClassRaw(2913)
+SegmentationObject(2913) 实例分割对象
以上是数据集文件夹的结构,后面对应的数字表示包含的子文件的数量。pascal voc2012整个数据集有17125张图片,但只有2913张图片用于语义分割。在Annotation文件夹中每张图片的xml格式信息对应的segmentation为1就表示用于语义分割。
对于语义分割,我们需要关注的是Segmentation文件夹下的三个txt文档,对应train,val,trainval数量。官方deeplab使用的train为1464,val1449,trainval2913。如果想要重新划分的训练集、验证集的话就修改这几个文档的内容。下面介绍怎么修改。
2.随机划分训练集、验证集
这里只给出随机抽样的方法,k fold交叉验证以后再试。
from __future__ import absolute_import, print_function
import os
import pandas as pd
path = '/home/hy/document/dataset/VOCdevkit/VOC2012/SegmentationClass'
lis = [i.split('.')[0] for i in os.listdir(path)] # 读取原图
df = pd.DataFrame(lis, columns=['name'])
temp1 = df.sample(n=1464)
train = temp1['name'].values.tolist()
print(len(train))
with open('/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt', 'w') as f:
for i in train[:-1]:
f.write(i+'\r\n')
f.write(train[-1])
temp2 = df.sample(n=583)
val = temp2['name'].values.tolist()
print(len(val))
with open('/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt', 'w') as f: #保存为val.txt
for i in val[:-1]:
f.write(i+'\r\n')
f.write(val[-1])
print(len(set(train) & set(val)))
因为之前val时出现内存不足的错误,我把val数据集改小了。然后就是生成tfrecord文件了,我在官方build_data.py和build_voc2012_data.py代码上进行修改,只需要修改一些路径就可以转换了:
import math
import os.path
import sys
import tensorflow as tf
import collections
import six
root_path = '/home/hy/document/dataset/VOCdevkit/VOC2012/ImageSets/Segmentation'
output_dir = '/home/hy/document/dataset/tfrecord'
image_folder = '/home/hy/document/dataset/VOCdevkit/VOC2012/JPEGImages'
semantic_segmentation_folder = '/home/hy/document/dataset/VOCdevkit/VOC2012/SegmentationClassRaw'
_NUM_SHARDS = 4
FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_enum('image_format', 'png', ['jpg', 'jpeg', 'png'],
'Image format.')
tf.app.flags.DEFINE_enum('label_format', 'png', ['png'],
'Segmentation label format.')
# A map from image format to expected data format.
_IMAGE_FORMAT_MAP = {
'jpg': 'jpeg',
'jpeg': 'jpeg',
'png': 'png',
}
class ImageReader(object):
"""Helper class that provides TensorFlow image coding utilities."""
def __init__(self, image_format='jpeg', channels=3):
with tf.Graph().as_default():
self._decode_data = tf.placeholder(dtype=tf.string)
self._image_format = image_format
self._session = tf.Session()
if self._image_format in ('jpeg', 'jpg'):
self._decode = tf.image.decode_jpeg(self._decode_data,
channels=channels)
elif self._image_format == 'png':
self._decode = tf.image.decode_png(self._decode_data,
channels=channels)
def read_image_dims(self, image_data):
image = self.decode_image(image_data)
return image.shape[:2]
def decode_image(self, image_data):
image = self._session.run(self._decode,
feed_dict={self._decode_data: image_data})
if len(image.shape) != 3 or image.shape[2] not in (1, 3):
raise ValueError('The image channels not supported.')
return image
def _int64_list_feature(values):
if not isinstance(values, collections.Iterable):
values = [values]
return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
def _bytes_list_feature(values):
def norm2bytes(value):
return value.encode() if isinstance(value, str) and six.PY3 else value
return tf.train.Feature(
bytes_list=tf.train.BytesList(value=[norm2bytes(values)]))
def image_seg_to_tfexample(image_data, filename, height, width, seg_data):
return tf.train.Example(features=tf.train.Features(feature={
'image/encoded': _bytes_list_feature(image_data),
'image/filename': _bytes_list_feature(filename),
'image/format': _bytes_list_feature(
_IMAGE_FORMAT_MAP[FLAGS.image_format]),
'image/height': _int64_list_feature(height),
'image/width': _int64_list_feature(width),
'image/channels': _int64_list_feature(3),
'image/segmentation/class/encoded': (
_bytes_list_feature(seg_data)),
'image/segmentation/class/format': _bytes_list_feature(
FLAGS.label_format),
}))
def _convert_dataset(dataset_split):
"""Converts the specified dataset split to TFRecord format.
Args:
dataset_split: The dataset split (e.g., train, test).
Raises:
RuntimeError: If loaded image and label have different shape.
"""
dataset = os.path.basename(dataset_split)[:-4]
sys.stdout.write('Processing ' + dataset)
filenames = [x.strip('\n') for x in open(dataset_split, 'r')]
num_images = len(filenames)
num_per_shard = int(math.ceil(num_images / float(_NUM_SHARDS)))
image_reader = ImageReader('jpeg', channels=3)
label_reader = ImageReader('png', channels=1)
if not tf.gfile.Exists(output_dir):
tf.gfile.MakeDirs(output_dir)
for shard_id in range(_NUM_SHARDS):
output_filename = os.path.join(output_dir,
'%s-%05d-of-%05d.tfrecord' % (dataset, shard_id, _NUM_SHARDS))
with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
start_idx = shard_id * num_per_shard
end_idx = min((shard_id + 1) * num_per_shard, num_images)
for i in range(start_idx, end_idx):
sys.stdout.write('\r>> Converting image %d/%d shard %d' % (
i + 1, len(filenames), shard_id))
sys.stdout.flush()
# Read the image.
image_filename = os.path.join(image_folder, filenames[i] + '.' + 'jpg')
image_data = tf.gfile.FastGFile(image_filename, 'rb').read()
height, width = image_reader.read_image_dims(image_data)
# Read the semantic segmentation annotation.
seg_filename = os.path.join(semantic_segmentation_folder,
filenames[i] + '.' + FLAGS.label_format)
seg_data = tf.gfile.FastGFile(seg_filename, 'rb').read()
seg_height, seg_width = label_reader.read_image_dims(seg_data)
if height != seg_height or width != seg_width:
raise RuntimeError('Shape mismatched between image and label.')
# Convert to tf example.
example = image_seg_to_tfexample(
image_data, filenames[i], height, width, seg_data)
tfrecord_writer.write(example.SerializeToString())
sys.stdout.write('\n')
sys.stdout.flush()
if __name__ == '__main__':
dataset_splits = tf.gfile.Glob(os.path.join(root_path, '*.txt'))
for dataset_split in dataset_splits:
_convert_dataset(dataset_split)
转换结果:
然后把对应的tfrecord文件和Segmentation文件去替换deeplab官方对应的文件。重新开始跑train.py和eval.py,得到评估结果:
坑坑坑
之前一直得不到eval的miou结果,发现是我下的tensorflow/models模块的deeplab版本跟最新的代码不同,估计是没有打印miou值。从最新版的deeplab复制eval.py代码后就能输出了。