x265中estIntraPredQT()分析(版本2.8)
程序员文章站
2022-07-07 11:58:42
...
一. 函数调用关系 :
二. 函数功能解析:
三. 源码注释分析:
/*
=========Analysed by: yangxin
=========Date: 2018.9
=========Function: estIntraPredQT(),帧内预测模式的估计,亮度
=========返回值: 总失真
*/
sse_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2])
{
CUData& cu = intraMode.cu;
Yuv* reconYuv = &intraMode.reconYuv;
Yuv* predYuv = &intraMode.predYuv;
const Yuv* fencYuv = intraMode.fencYuv;
uint32_t depth = cuGeom.depth;//--当前CU相对于CTU的深度
uint32_t initTuDepth = cu.m_partSize[0] != SIZE_2Nx2N;
uint32_t numPU = 1 << (2 * initTuDepth);//--PU的数量计算
uint32_t log2TrSize = cuGeom.log2CUSize - initTuDepth;
uint32_t tuSize = 1 << log2TrSize;
uint32_t qNumParts = cuGeom.numPartitions >> 2;
uint32_t sizeIdx = log2TrSize - 2;
uint32_t absPartIdx = 0;
sse_t totalDistortion = 0;
int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[0] != SIZE_2Nx2N;
// loop over partitions//--遍历每个PU分区
for (uint32_t puIdx = 0; puIdx < numPU; puIdx++, absPartIdx += qNumParts)
{
uint32_t bmode = 0;
if (intraMode.cu.m_lumaIntraDir[puIdx] != (uint8_t)ALL_IDX)//--如果有确定的角度模式,PLANAR、VERTICAL、HORIZONTAL、DC
bmode = intraMode.cu.m_lumaIntraDir[puIdx];
else
{
uint64_t candCostList[MAX_RD_INTRA_MODES];//--候选模式cost列表
uint32_t rdModeList[MAX_RD_INTRA_MODES];//--对应的候选模式列表
uint64_t bcost;//--定义的best cost变量
int maxCandCount = 2 + m_param->rdLevel + ((depth + initTuDepth) >> 1);//--候选个数的最大值根据外部参数rdlevel和深度、初始TU深度确定
{
ProfileCUScope(intraMode.cu, intraAnalysisElapsedTime, countIntraAnalysis);
// Reference sample smoothing//--参考像素的滤波
IntraNeighbors intraNeighbors;
initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);
initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);
// determine set of modes to be tested (using prediction signal only)//--确定被检测的模式集合,仅使用预测信号
const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
uint32_t stride = predYuv->m_size;
int scaleTuSize = tuSize;
int scaleStride = stride;
int costShift = 0;
m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);
/* there are three cost tiers for intra modes:
* pred[0] - mode probable, least cost//--最可能的模式,代价最小
* pred[1], pred[2] - less probable, slightly more cost//--不太可能,代价稍微多点
* non-mpm modes - all cost the same (rbits) //--所有代价一样*/
uint64_t mpms;
uint32_t mpmModes[3];//--
uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, mpmModes, mpms);//--get most probable luma modes for CU part, and bit cost of all non mpm modes
pixelcmp_t sa8d = primitives.cu[sizeIdx].sa8d;
uint64_t modeCosts[35];//--35种预测模式,定义一维数组
// DC//--DC模式cost
primitives.cu[sizeIdx].intra_pred[DC_IDX](m_intraPred, scaleStride, intraNeighbourBuf[0], 0, (scaleTuSize <= 16));
uint32_t bits = (mpms & ((uint64_t)1 << DC_IDX)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, DC_IDX) : rbits;//==bits计算
uint32_t sad = sa8d(fenc, scaleStride, m_intraPred, scaleStride) << costShift;//==SAD计算,绝对误差和
modeCosts[DC_IDX] = bcost = m_rdCost.calcRdSADCost(sad, bits);//--计算出rd sad代价
// PLANAR//--plane 模式cost
pixel* planar = intraNeighbourBuf[0];//--Unfiltered/filtered neighbours of the current partition当前分区的邻居过滤
if (tuSize >= 8 && tuSize <= 32)
planar = intraNeighbourBuf[1];
primitives.cu[sizeIdx].intra_pred[PLANAR_IDX](m_intraPred, scaleStride, planar, 0, 0);
bits = (mpms & ((uint64_t)1 << PLANAR_IDX)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, PLANAR_IDX) : rbits;
sad = sa8d(fenc, scaleStride, m_intraPred, scaleStride) << costShift;
modeCosts[PLANAR_IDX] = m_rdCost.calcRdSADCost(sad, bits);
COPY1_IF_LT(bcost, modeCosts[PLANAR_IDX]);//==比较DC和plane模式的代价,选择其中最优cost //if(b<a) a=b;
// angular predictions//--其他角度模式cost
if (primitives.cu[sizeIdx].intra_pred_allangs)//---区分 mode18以下和以下不同的SAD计算
{
primitives.cu[sizeIdx].transpose(m_fencTransposed, fenc, scaleStride);
primitives.cu[sizeIdx].intra_pred_allangs(m_intraPredAngs, intraNeighbourBuf[0], intraNeighbourBuf[1], (scaleTuSize <= 16));
//--mode=0、1分别是planar 、DC
for (int mode = 2; mode < 35; mode++)
{
bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, mode) : rbits;
if (mode < 18)
sad = sa8d(m_fencTransposed, scaleTuSize, &m_intraPredAngs[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
else
sad = sa8d(fenc, scaleStride, &m_intraPredAngs[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
modeCosts[mode] = m_rdCost.calcRdSADCost(sad, bits);
COPY1_IF_LT(bcost, modeCosts[mode]);//==将每个模式下的cost跟前面得到的最优进行比较,再选择cost最小的模式
}
}
else
{
//--mode=0、1分别是planer 、DC
for (int mode = 2; mode < 35; mode++)
{
bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, mode) : rbits;
int filter = !!(g_intraFilterFlags[mode] & scaleTuSize);
primitives.cu[sizeIdx].intra_pred[mode](m_intraPred, scaleTuSize, intraNeighbourBuf[filter], mode, scaleTuSize <= 16);
sad = sa8d(fenc, scaleStride, m_intraPred, scaleTuSize) << costShift;
modeCosts[mode] = m_rdCost.calcRdSADCost(sad, bits);
COPY1_IF_LT(bcost, modeCosts[mode]);
}
}
/* Find the top maxCandCount candidate modes with cost within 25% of best
* or among the most probable modes. maxCandCount is derived from(来源) the
* rdLevel and depth. In general we want to try more modes at slower RD
* levels and at higher depths */
for (int i = 0; i < maxCandCount; i++)
candCostList[i] = MAX_INT64;
uint64_t paddedBcost = bcost + (bcost >> 2); // 1.25%
for (int mode = 0; mode < 35; mode++)
if ((modeCosts[mode] < paddedBcost) || ((uint32_t)mode == mpmModes[0]))
/* choose for R-D analysis only if this mode passes cost threshold or matches MPM[0] */
updateCandList(mode, modeCosts[mode], maxCandCount, rdModeList, candCostList);//--与前面模式cost更新交换
}
/* measure best candidates using simple RDO (no TU splits) *///--使用简单的RDO来测量最好的预测,没有TU分割
bcost = MAX_INT64;
for (int i = 0; i < maxCandCount; i++)
{
if (candCostList[i] == MAX_INT64)
break;
ProfileCUScope(intraMode.cu, intraRDOElapsedTime[cuGeom.depth], countIntraRDO[cuGeom.depth]);
m_entropyCoder.load(m_rqt[depth].cur);
cu.setLumaIntraDirSubParts(rdModeList[i], absPartIdx, depth + initTuDepth);
Cost icosts;
if (checkTransformSkip)
codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
else
codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, false, icosts, depthRange);//--不分割情况
COPY2_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i]);//==if(b<a){a=b;c=d;}
}
}
ProfileCUScope(intraMode.cu, intraRDOElapsedTime[cuGeom.depth], countIntraRDO[cuGeom.depth]);
/* remeasure best mode, allowing TU splits *///--重新测量最佳模式,允许使用TU分割
cu.setLumaIntraDirSubParts(bmode, absPartIdx, depth + initTuDepth);
m_entropyCoder.load(m_rqt[depth].cur);
Cost icosts;
if (checkTransformSkip)
codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
else
//====generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits //生成预测,剩余和重建,并找到最优RQT分割
codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, true, icosts, depthRange);
/*=========================计算出总的失真值=============================================*/
totalDistortion += icosts.distortion;
extractIntraResultQT(cu, *reconYuv, initTuDepth, absPartIdx);//--提取出帧内结果
// set reconstruction for next intra prediction blocks//--为下一个帧内预测块设置重构块
if (puIdx != numPU - 1)
{
/* This has important implications for parallelism(并行) and RDO. It is writing intermediate(中间的) results into the
* output recon picture(重建图像), so it cannot proceed in parallel with anything else when doing INTRA_NXN. Also
* it is not updating m_rdContexts[depth].cur for the later PUs which I suspect is slightly wrong. I think
* that the contexts should be tracked through each PU */
PicYuv* reconPic = m_frame->m_reconPic;
pixel* dst = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);//--get pointer to CU start address
uint32_t dststride = reconPic->m_stride;
const pixel* src = reconYuv->getLumaAddr(absPartIdx);
uint32_t srcstride = reconYuv->m_size;
primitives.cu[log2TrSize - 2].copy_pp(dst, dststride, src, srcstride);
}
}
if (numPU > 1)
{
uint32_t combCbfY = 0;
for (uint32_t qIdx = 0, qPartIdx = 0; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
combCbfY |= cu.getCbf(qPartIdx, TEXT_LUMA, 1);//==得到编码过块标志
cu.m_cbf[0][0] |= combCbfY;
}
// TODO: remove this
m_entropyCoder.load(m_rqt[depth].cur);
return totalDistortion;
}
上一篇: 搭建Nginx_RTMP推流服务器