纯C++超分辨率重建LapSRN --改编--(四)偏置乘法
程序员文章站
2023-12-30 19:21:28
...
matconvnet使用sgemm乘法来处理偏置,难道乘法比加法速度更快,
SGEMM 执行下面矩阵运算
C := alpha * A * B + beta * C,
在处理偏置中为:
C := A * B + C,
其中:
A为值全部为1的一个矩阵块
B为1个值或有64个值(每个通道一个值)
C为图像矩阵(1通道或64通道)
在vl_nnconv中前面已经给出
下面是vl_nnconvt中的:
void vl_impl_nnbias_forward_blas_CPU_float(//vl::Context& context,
卷积层 * output, double outputMult,
double dataMult,
层数据 * filters_biases, double biasesMult)
{
//int numOutputPixels = output.getHeight() * output.getWidth() ;
int numOutputPixels = output->height * output->width ;
float * allOnesMemory = new float[numOutputPixels * sizeof(float)]; //
if (allOnesMemory == NULL) {
printf("内存分配错误!"); ;
goto done ;
}
{//设值为1
float * tt=allOnesMemory;
for(int i=0;i<numOutputPixels;i++)
{
*tt++ = 1;
}
}
float * biases=filters_biases-> 偏移_数据;
double alpha = outputMult ;
if (biases) {
gemm(//context,
'n', 'n',
numOutputPixels,filters_biases->偏移长度 , 1,//biases.getNumElements()
biasesMult,
allOnesMemory, numOutputPixels,
(float*)biases, 1,
alpha,
(float*)output->data/*.getMemory() + outputOffset*/, numOutputPixels) ;
alpha = 1 ;
}
done:
if (allOnesMemory != NULL) {
delete []allOnesMemory; allOnesMemory=NULL;
}
}
void vl_nnbias_forward(
卷积层 * output, double outputMult,
/*vl::Tensor data,*/ double dataMult,
层数据 * filters_biases, double biasesMult)
{
vl_impl_nnbias_forward_blas_CPU_float
(
output, outputMult, /*data,*/ dataMult, filters_biases, biasesMult) ;
}