x265中estIntraPredQT()分析(版本2.8)

程序员文章站 2022-07-07 11:58:42

...

一. 函数调用关系：

二. 函数功能解析：

x265中estIntraPredQT()分析(版本2.8)

三. 源码注释分析：

/*
 =========Analysed by:     yangxin
 =========Date:            2018.9
 =========Function:        estIntraPredQT()，帧内预测模式的估计，亮度
 =========返回值：         总失真
*/
sse_t Search::estIntraPredQT(Mode &intraMode, const CUGeom& cuGeom, const uint32_t depthRange[2])
{
    CUData& cu = intraMode.cu;
    Yuv* reconYuv = &intraMode.reconYuv;
    Yuv* predYuv = &intraMode.predYuv;
    const Yuv* fencYuv = intraMode.fencYuv;

    uint32_t depth        = cuGeom.depth;//--当前CU相对于CTU的深度
    uint32_t initTuDepth  = cu.m_partSize[0] != SIZE_2Nx2N;
    uint32_t numPU        = 1 << (2 * initTuDepth);//--PU的数量计算
    uint32_t log2TrSize   = cuGeom.log2CUSize - initTuDepth;
    uint32_t tuSize       = 1 << log2TrSize;
    uint32_t qNumParts    = cuGeom.numPartitions >> 2;
    uint32_t sizeIdx      = log2TrSize - 2;
    uint32_t absPartIdx   = 0;
    sse_t totalDistortion = 0;

    int checkTransformSkip = m_slice->m_pps->bTransformSkipEnabled && !cu.m_tqBypass[0] && cu.m_partSize[0] != SIZE_2Nx2N;

    // loop over partitions//--遍历每个PU分区
    for (uint32_t puIdx = 0; puIdx < numPU; puIdx++, absPartIdx += qNumParts)
    {
        uint32_t bmode = 0;

        if (intraMode.cu.m_lumaIntraDir[puIdx] != (uint8_t)ALL_IDX)//--如果有确定的角度模式，PLANAR、VERTICAL、HORIZONTAL、DC
            bmode = intraMode.cu.m_lumaIntraDir[puIdx];
        else
        {
            uint64_t candCostList[MAX_RD_INTRA_MODES];//--候选模式cost列表
            uint32_t rdModeList[MAX_RD_INTRA_MODES];//--对应的候选模式列表
            uint64_t bcost;//--定义的best cost变量
            int maxCandCount = 2 + m_param->rdLevel + ((depth + initTuDepth) >> 1);//--候选个数的最大值根据外部参数rdlevel和深度、初始TU深度确定

            {
                ProfileCUScope(intraMode.cu, intraAnalysisElapsedTime, countIntraAnalysis);

                // Reference sample smoothing//--参考像素的滤波
                IntraNeighbors intraNeighbors;
                initIntraNeighbors(cu, absPartIdx, initTuDepth, true, &intraNeighbors);
                initAdiPattern(cu, cuGeom, absPartIdx, intraNeighbors, ALL_IDX);

                // determine set of modes to be tested (using prediction signal only)//--确定被检测的模式集合，仅使用预测信号
                const pixel* fenc = fencYuv->getLumaAddr(absPartIdx);
                uint32_t stride = predYuv->m_size;

                int scaleTuSize = tuSize;
                int scaleStride = stride;
                int costShift = 0;

                m_entropyCoder.loadIntraDirModeLuma(m_rqt[depth].cur);

                /* there are three cost tiers for intra modes:
                *  pred[0]          - mode probable, least cost//--最可能的模式，代价最小
                *  pred[1], pred[2] - less probable, slightly more cost//--不太可能，代价稍微多点
                *  non-mpm modes    - all cost the same (rbits) //--所有代价一样*/
                uint64_t mpms;
                uint32_t mpmModes[3];//--
                uint32_t rbits = getIntraRemModeBits(cu, absPartIdx, mpmModes, mpms);//--get most probable luma modes for CU part, and bit cost of all non mpm modes

                pixelcmp_t sa8d = primitives.cu[sizeIdx].sa8d;
                uint64_t modeCosts[35];//--35种预测模式，定义一维数组

                // DC//--DC模式cost
                primitives.cu[sizeIdx].intra_pred[DC_IDX](m_intraPred, scaleStride, intraNeighbourBuf[0], 0, (scaleTuSize <= 16));
                uint32_t bits = (mpms & ((uint64_t)1 << DC_IDX)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, DC_IDX) : rbits;//==bits计算
                uint32_t sad = sa8d(fenc, scaleStride, m_intraPred, scaleStride) << costShift;//==SAD计算，绝对误差和
                modeCosts[DC_IDX] = bcost = m_rdCost.calcRdSADCost(sad, bits);//--计算出rd sad代价

                // PLANAR//--plane 模式cost
                pixel* planar = intraNeighbourBuf[0];//--Unfiltered/filtered neighbours of the current partition当前分区的邻居过滤
                if (tuSize >= 8 && tuSize <= 32)
                    planar = intraNeighbourBuf[1];

                primitives.cu[sizeIdx].intra_pred[PLANAR_IDX](m_intraPred, scaleStride, planar, 0, 0);
                bits = (mpms & ((uint64_t)1 << PLANAR_IDX)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, PLANAR_IDX) : rbits;
                sad = sa8d(fenc, scaleStride, m_intraPred, scaleStride) << costShift;
                modeCosts[PLANAR_IDX] = m_rdCost.calcRdSADCost(sad, bits);

                COPY1_IF_LT(bcost, modeCosts[PLANAR_IDX]);//==比较DC和plane模式的代价，选择其中最优cost //if(b<a) a=b;

                // angular predictions//--其他角度模式cost
                if (primitives.cu[sizeIdx].intra_pred_allangs)//---区分 mode18以下和以下不同的SAD计算
                {
                    primitives.cu[sizeIdx].transpose(m_fencTransposed, fenc, scaleStride);
                    primitives.cu[sizeIdx].intra_pred_allangs(m_intraPredAngs, intraNeighbourBuf[0], intraNeighbourBuf[1], (scaleTuSize <= 16));
                    //--mode=0、1分别是planar 、DC
					for (int mode = 2; mode < 35; mode++)
                    {
                        bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, mode) : rbits;
                        if (mode < 18)
                            sad = sa8d(m_fencTransposed, scaleTuSize, &m_intraPredAngs[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
                        else
                            sad = sa8d(fenc, scaleStride, &m_intraPredAngs[(mode - 2) * (scaleTuSize * scaleTuSize)], scaleTuSize) << costShift;
                        modeCosts[mode] = m_rdCost.calcRdSADCost(sad, bits);
                        COPY1_IF_LT(bcost, modeCosts[mode]);//==将每个模式下的cost跟前面得到的最优进行比较，再选择cost最小的模式
                    }
                }
                else
                {
					//--mode=0、1分别是planer 、DC
                    for (int mode = 2; mode < 35; mode++)
                    {
                        bits = (mpms & ((uint64_t)1 << mode)) ? m_entropyCoder.bitsIntraModeMPM(mpmModes, mode) : rbits;
                        int filter = !!(g_intraFilterFlags[mode] & scaleTuSize);
                        primitives.cu[sizeIdx].intra_pred[mode](m_intraPred, scaleTuSize, intraNeighbourBuf[filter], mode, scaleTuSize <= 16);
                        sad = sa8d(fenc, scaleStride, m_intraPred, scaleTuSize) << costShift;
                        modeCosts[mode] = m_rdCost.calcRdSADCost(sad, bits);
                        COPY1_IF_LT(bcost, modeCosts[mode]);
                    }
                }

                /* Find the top maxCandCount candidate modes with cost within 25% of best
                * or among the most probable modes. maxCandCount is derived from（来源） the
                * rdLevel and depth. In general we want to try more modes at slower RD
                * levels and at higher depths */
                for (int i = 0; i < maxCandCount; i++)
                    candCostList[i] = MAX_INT64;

                uint64_t paddedBcost = bcost + (bcost >> 2); // 1.25%
                for (int mode = 0; mode < 35; mode++)
                    if ((modeCosts[mode] < paddedBcost) || ((uint32_t)mode == mpmModes[0])) 
                        /* choose for R-D analysis only if this mode passes cost threshold or matches MPM[0] */
                        updateCandList(mode, modeCosts[mode], maxCandCount, rdModeList, candCostList);//--与前面模式cost更新交换
            }

            /* measure best candidates using simple RDO (no TU splits) *///--使用简单的RDO来测量最好的预测，没有TU分割
            bcost = MAX_INT64;
            for (int i = 0; i < maxCandCount; i++)
            {
                if (candCostList[i] == MAX_INT64)
                    break;

                ProfileCUScope(intraMode.cu, intraRDOElapsedTime[cuGeom.depth], countIntraRDO[cuGeom.depth]);

                m_entropyCoder.load(m_rqt[depth].cur);
                cu.setLumaIntraDirSubParts(rdModeList[i], absPartIdx, depth + initTuDepth);

                Cost icosts;
                if (checkTransformSkip)
                    codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
                else
                    codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, false, icosts, depthRange);//--不分割情况
                COPY2_IF_LT(bcost, icosts.rdcost, bmode, rdModeList[i]);//==if(b<a){a=b;c=d;}
            }
        }

        ProfileCUScope(intraMode.cu, intraRDOElapsedTime[cuGeom.depth], countIntraRDO[cuGeom.depth]);

        /* remeasure best mode, allowing TU splits *///--重新测量最佳模式，允许使用TU分割
        cu.setLumaIntraDirSubParts(bmode, absPartIdx, depth + initTuDepth);
        m_entropyCoder.load(m_rqt[depth].cur);

        Cost icosts;
        if (checkTransformSkip)
            codeIntraLumaTSkip(intraMode, cuGeom, initTuDepth, absPartIdx, icosts);
        else
			//====generate prediction, generate residual and recon. if bAllowSplit, find optimal RQT splits //生成预测，剩余和重建，并找到最优RQT分割
            codeIntraLumaQT(intraMode, cuGeom, initTuDepth, absPartIdx, true, icosts, depthRange);


		/*=========================计算出总的失真值=============================================*/
        totalDistortion += icosts.distortion;

        extractIntraResultQT(cu, *reconYuv, initTuDepth, absPartIdx);//--提取出帧内结果

        // set reconstruction for next intra prediction blocks//--为下一个帧内预测块设置重构块
        if (puIdx != numPU - 1)
        {
            /* This has important implications for parallelism(并行) and RDO.  It is writing intermediate(中间的) results into the
             * output recon picture(重建图像), so it cannot proceed in parallel with anything else when doing INTRA_NXN. Also
             * it is not updating m_rdContexts[depth].cur for the later PUs which I suspect is slightly wrong. I think
             * that the contexts should be tracked through each PU */
            PicYuv*  reconPic = m_frame->m_reconPic;
            pixel*   dst       = reconPic->getLumaAddr(cu.m_cuAddr, cuGeom.absPartIdx + absPartIdx);//--get pointer to CU start address 
            uint32_t dststride = reconPic->m_stride;
            const pixel*   src = reconYuv->getLumaAddr(absPartIdx);
            uint32_t srcstride = reconYuv->m_size;
            primitives.cu[log2TrSize - 2].copy_pp(dst, dststride, src, srcstride);
        }
    }

    if (numPU > 1)
    {
        uint32_t combCbfY = 0;
        for (uint32_t qIdx = 0, qPartIdx = 0; qIdx < 4; ++qIdx, qPartIdx += qNumParts)
            combCbfY |= cu.getCbf(qPartIdx, TEXT_LUMA, 1);//==得到编码过块标志

        cu.m_cbf[0][0] |= combCbfY;
    }

    // TODO: remove this
    m_entropyCoder.load(m_rqt[depth].cur);

    return totalDistortion;
}

上一篇：搭建Nginx_RTMP推流服务器

下一篇： x265中checkIntra()分析(版本2.8)

x265中estIntraPredQT()分析(版本2.8)

一. 函数调用关系：

二. 函数功能解析：

三. 源码注释分析：

PostgreSQL10版本中的自增列代码实例分析

x265中compressIntraCU()分析(版本2.8)

x265中estIntraPredQT()分析(版本2.8)

x265中checkIntra()分析(版本2.8)

linux的低版本中openssh三大漏洞的分析及修复方法

微信视频小窗口在USER版本设置二级菜单中不显示问题分析

PostgreSQL10版本中的自增列代码实例分析

linux的低版本中openssh三大漏洞的分析及修复方法

微信视频小窗口在USER版本设置二级菜单中不显示问题分析

x265中estIntraPredQT()分析(版本2.8)

一. 函数调用关系 ：

二. 函数功能解析：

三. 源码注释分析：

PostgreSQL10版本中的自增列代码实例分析

x265中compressIntraCU()分析(版本2.8)

x265中estIntraPredQT()分析(版本2.8)

x265中checkIntra()分析(版本2.8)

linux的低版本中openssh三大漏洞的分析及修复方法

微信视频小窗口在USER版本设置二级菜单中不显示问题分析

PostgreSQL10版本中的自增列代码实例分析

linux的低版本中openssh三大漏洞的分析及修复方法

微信视频小窗口在USER版本设置二级菜单中不显示问题分析

一. 函数调用关系：