欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页

CUDA编程:SSD的priorbox层

程序员文章站 2024-03-17 11:29:04
...

SSD中的pribox层的实现:

__global__ void PriKernel(float *top_data,const int layer_height,const int layer_width,const int img_height, const int img_width,const float step_w,const float step_h, const int offset, float *min_sizes,const int min_sizes_len,float*max_sizes,const int max_sizes_len,float *aspts,const int aspts_len, const int clip,const int num_priors)
{
   Dtype* top_data = top[0]->mutable_gpu_data();
  int dim = layer_height * layer_width * num_priors_ * 4;  // 一般情况下w*h*6*4
 
  int h = iThredNumber;
  int idx = h * layer_width * num_priors *4;
 // for (int h = 0; h < layer_height; ++h) {   // 对于feature map上的每个点逐一映射
    for (int w = 0; w < layer_width; ++w) {
      // 这里和Faster RCNN 一样,就是把feature map上的点映射回原图,这里加上0.5也是为了四舍五入,和faster rcnn python代码类似
      float center_x = (w + offset_) * step_w;   
      float center_y = (h + offset_) * step_h;
      float box_width, box_height;
      for (int s = 0; s < min_sizes_.size(); ++s) {  // min_sizes_.size()=1
        int min_size_ = min_sizes_[s]; 
        // 这里的min_size从fc7_mbox_priorbox的60到最后的276,就是s_k从0.2到0.92的过程
        // first prior: aspect_ratio = 1, size = min_size
        box_width = box_height = min_size_;  
        // xmin
        top_data[idx++] = (center_x - box_width / 2.) / img_width;    // 
        // ymin
        top_data[idx++] = (center_y - box_height / 2.) / img_height;
        // xmax
        top_data[idx++] = (center_x + box_width / 2.) / img_width;
        // ymax
        top_data[idx++] = (center_y + box_height / 2.) / img_height;
 
        if (max_sizes_.size() > 0) {
          CHECK_EQ(min_sizes_.size(), max_sizes_.size());
          int max_size_ = max_sizes_[s];
          // second prior: aspect_ratio = 1, size = sqrt(min_size * max_size)  // 这里就和论文中一致,s_k的选法,每个都不同
          box_width = box_height = sqrt(min_size_ * max_size_);
          // xmin
          top_data[idx++] = (center_x - box_width / 2.) / img_width;
          // ymin
          top_data[idx++] = (center_y - box_height / 2.) / img_height;
          // xmax
          top_data[idx++] = (center_x + box_width / 2.) / img_width;
          // ymax
          top_data[idx++] = (center_y + box_height / 2.) / img_height;
        }
 
        // rest of priors
        for (int r = 0; r < aspect_ratios_.size(); ++r) {  // 其他几个比例计算
          float ar = aspect_ratios_[r];
          if (fabs(ar - 1.) < 1e-6) {
            continue;
          }
          box_width = min_size_ * sqrt(ar);
          box_height = min_size_ / sqrt(ar);
          // xmin
          top_data[idx++] = (center_x - box_width / 2.) / img_width;
          // ymin
          top_data[idx++] = (center_y - box_height / 2.) / img_height;
          // xmax
          top_data[idx++] = (center_x + box_width / 2.) / img_width;
          // ymax
          top_data[idx++] = (center_y + box_height / 2.) / img_height;
        }
      }  // end for min_size=1
    }  // end for w
  //}  // end for h
  // 到这里,所有的prior_box选取完成,共6个比例,和论文中相符合,同时在每一层中算一个s_k,就是每一层都会设置一个min_size
  // clip the prior's coordidate such that it is within [0, 1]
  if (clip_) {                        // 裁剪到[0,1]
    int idx2 = h * layer_width * num_priors *4;
    for (int d = 0; d < layer_width * num_priors *4; ++d) {
      top_data[idx2 + d] = std::min(std::max(top_data[idx2+d], 0.), 1.);
    }
  }
}

并行方差,适当的修改一下参数就可以实现GPU的并行;

__global__ void PriKernel2(float *top_data,const int layer_height,const int layer_width,
float *variance,const int variance_len,const int num_priors)
{
  
    int count = iThredNum * layer_width * num_priors *4;
   // for (int h = 0; h < layer_height; ++h) {
      for (int w = 0; w < layer_width; ++w) {
        for (int i = 0; i < num_priors_; ++i) {
          for (int j = 0; j < 4; ++j) {
            top_data[count] = variance_[j];
            ++count;
          }
        }
      }
   // }

}

第三步,就是把本地的数据copy到GPU上,caffe中是如果实现GPU的代码,默认使用GPU的代码;

相关标签: ssd