C++ 实现语义分割 deeplab_v3

程序员文章站 2024-03-15 10:35:17

...

在

https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/model_zoo.md

中有许多个模型。

先下一个试试效果如何：
http://download.tensorflow.org/models/deeplabv3_mnv2_dm05_pascal_trainaug_2018_10_01.tar.gz

由于
我的Tensoflow是1.0版。不能直接运行，先把模型编辑一下，去掉

“Conv2D”的属性“dilations”和“data_format”

以及“Cast”的属性“Truncate”

“ArgMax”的属性“output_type”，然后就可以运行了。

模型中的空洞卷积是用SpaceToBatchND 和 BatchToSpaceND，先要用C++实现这两个函数

1。SpaceToBatchND 函数：

//四维 pad1(左、上补边)，pad2(右、下补边)
张量 * SpaceToBatchND(张量 &in,int block_shape,int pad1,int pad2)
{

	张量* out= new张量(in.num);
	out->value=new int[out->num];
//	//批，高，宽，通道 N,H,W,C
	//输入四维[b, h, w, c], padding后[b, new_h, new_w, c]
	//x = np.pad(out_in, ((0,0), (4,7),(4,7), (0,0)), 'constant') // from shape (1, 2, 2, 1) to (1, 6, 6, 1)
	//x_shape=x.shape
	//b     = x_shape[0]
	//new_h = x_shape[1]
	//new_w = x_shape[2]
	//c     = x_shape[3]

	//批，通道，高，宽 N,C,H,W
	//补边
	for(int i=0;i<in.num;i++)
	{
		out->value[i]=in.value[i];
	}
	int in_w=in.value[3];
	int new_w=in_w+pad1+pad2;
	int new_h=in.value[2]+pad1+pad2;
	out->value[3]=new_w;
	out->value[2]=new_h;
	out->data=new float[out->size()];
	//复制内容
	张量清零(*out);
	float *s=in.data;
	float *d=out->data+pad1;//跳过左边
	for(int i=0;i<in.value[0];++i)//N
	{
		for(int j=0;j<in.value[1];++j)//C
		{
			d += pad1*new_w;//跳过上面
			for(int j=0;j<in.value[2];++j)//H
			{
					cblas_scopy(in_w, s, 1, d, 1);
					s+=in_w;
					d+=new_w;
			}
			d+=pad2*new_w;//跳过下边
		}
	}

	int b=out->value[0];
	int c=out->value[1];
	int dilation=block_shape;
	//h_dilation=int(new_h / dilation)
	int h_dilation=new_h / dilation;
	//w_dilation=int(new_w / dilation)
	int w_dilation=new_w / dilation;

		//reshape维度为[b, new_h / dilation_h, dilation_h, new_w / dilation_w, dilation_w, c].
	//y = np.reshape(x, (b, h_dilation, dilation, w_dilation, dilation, c))
	//B, C, h_dilation, dilation, w_dilation, dilation
	view(*out,b,c,h_dilation, dilation, w_dilation, dilation);

	//转置为[dilation_h, dilation_w, b, new_h / dilation_h, new_w / dilation_w, c].
	//z = np.transpose(y, (2, 4, 0, 1, 3, 5))

	//B, C, h_dilation, dilation, w_dilation, dilation
	//dilation_h, dilation_w, B, C, new_h / dilation_h, new_w / dilation_w
	张量 * z= permute(*out,3, 5, 0, 1, 2, 4);

	//reshape, 维度为[b x dilation_h x dilation_w, new_h / dilation_h, new_w / dilation_w, c] 
	//r = np.reshape(z, (b * dilation * dilation, h_dilation, w_dilation, c))
	//B * dilation * dilation, C, h_dilation, w_dilation
	view(*z,b * dilation * dilation, c, h_dilation, w_dilation);//block_shape);

	return z;
}

2。BatchToSpaceND 函数：

//四维 crop1(左、上裁剪)，crop2(右、下裁剪)
张量 * BatchToSpaceND(张量 &in,int block_shape,int crop1,int crop2)
{
	int dilation=block_shape;
//	b     = int(x_shape[0]/(dilation * dilation))
	int b=in.value[0]/(dilation * dilation);
//	c     = x_shape[3]
	int c=in.value[1];
	int h=in.value[2];
	int w=in.value[3];

//#	重塑 input 为 reshaped 
//	r = np.reshape(out_in, (dilation, dilation, b, h, w, c))
	view(in,dilation, dilation, b, c, h, w);

//#	重塑 permuted 以产生 reshaped_permuted 的形状 
//	#(dilation, dilation, b, h, w, c)
//	#(b, h_dilation, dilation, w_dilation, dilation, c)
//	z = np.transpose(r, (2, 3, 0, 4, 1, 5))
	//(b, c, h, dilation, w, dilation)
	张量 * z = permute(in,2, 3, 4, 0, 5, 1);
//#	h_dilation=int(new_h / dilation)
//	new_h=h*dilation
	int new_h=h*dilation;
//#	w_dilation=int(new_w / dilation)
//	new_w=w*dilation
	int new_w=w*dilation;

//	#b, h_dilation, dilation, w_dilation, dilation, c
//	#b,h,w,c
//	y = np.reshape(z, (b, new_h, new_w, c))
	view(*z,b, c, new_h, new_w);

//#	裁剪尺寸的开始和结束
//	x = y[:, 0:new_h-3,0:new_w-3,:]
	张量 * out=裁剪(z,crop1,crop2);


	return out;
}

就差不多了。

看一下效果如何：

C++ 实现语义分割 deeplab_v3 输入图5.jpg