VVC/VTM:代码学习——量化的实现(普通量化和Dependent scalar Quantization)
程序员文章站
2022-07-14 20:01:25
...
关于量化原理的介绍可以参考博文:https://blog.csdn.net/liangjiubujiu/article/details/80569391
关于HEVC/H265量化的实现代码可参考博文:https://blog.csdn.net/qq_21747841/article/details/77483290
VVC中引进了Dependent Scalar Quantization(依赖性的标量量化),所以代码中有三种量化函数,本文分为普通量化、率失真优化量化RDOQ和Dependent Quantization(简称DQ)
普通量化
实现量化的主体函数是void Quant::quant()
对应下述原理学习代码
floor(QP/6):整除操作
void Quant::quant(TransformUnit &tu, const ComponentID &compID, const CCoeffBuf &pSrc, TCoeff &uiAbsSum, const QpParam &cQP, const Ctx& ctx)
{
const SPS &sps = *tu.cs->sps;
const CompArea &rect = tu.blocks[compID];
#if HEVC_USE_SCALING_LISTS || HEVC_USE_SIGN_HIDING
const uint32_t uiWidth = rect.width;
const uint32_t uiHeight = rect.height;
#endif
const int channelBitDepth = sps.getBitDepth(toChannelType(compID));
const CCoeffBuf &piCoef = pSrc;//待量化系数,即变换后的系数
CoeffBuf piQCoef = tu.getCoeffs(compID);//量化后的系数
const bool useTransformSkip = tu.mtsIdx==1;
const int maxLog2TrDynamicRange = sps.getMaxLog2TrDynamicRange(toChannelType(compID));
{
#if HEVC_USE_SIGN_HIDING
CoeffCodingContext cctx(tu, compID, tu.cs->slice->getSignDataHidingEnabledFlag());
#else
CoeffCodingContext cctx(tu, compID);
#endif
const TCoeff entropyCodingMinimum = -(1 << maxLog2TrDynamicRange);
const TCoeff entropyCodingMaximum = (1 << maxLog2TrDynamicRange) - 1;
#if HEVC_USE_SIGN_HIDING
TCoeff deltaU[MAX_TB_SIZEY * MAX_TB_SIZEY];
#endif
#if HEVC_USE_SCALING_LISTS
int scalingListType = getScalingListType(tu.cu->predMode, compID);
CHECK(scalingListType >= SCALING_LIST_NUM, "Invalid scaling list");
const uint32_t uiLog2TrWidth = g_aucLog2[uiWidth];
const uint32_t uiLog2TrHeight = g_aucLog2[uiHeight];
int *piQuantCoeff = getQuantCoeff(scalingListType, cQP.rem, uiLog2TrWidth-1, uiLog2TrHeight-1);
const bool enableScalingLists = getUseScalingList(uiWidth, uiHeight, useTransformSkip);
#endif
const int defaultQuantisationCoefficient = g_quantScales[cQP.rem];//获取MF(q),根据QP%6的值(cQP.rem)
/*
const int g_quantScales[SCALING_LIST_REM_NUM] =
{
26214,23302,20560,18396,16384,14564
};
*/
/* for 422 chroma blocks, the effective scaling applied during transformation is not a power of 2, hence it cannot be
* implemented as a bit-shift (the quantised result will be sqrt(2) * larger than required). Alternatively, adjust the
* uiLog2TrSize applied in iTransformShift, such that the result is 1/sqrt(2) the required result (i.e. smaller)
* Then a QP+3 (sqrt(2)) or QP-3 (1/sqrt(2)) method could be used to get the required result
*/
// Represents scaling through forward transform
int iTransformShift = getTransformShift(channelBitDepth, rect.size(), maxLog2TrDynamicRange);
if (useTransformSkip && sps.getSpsRangeExtension().getExtendedPrecisionProcessingFlag())
{
iTransformShift = std::max<int>(0, iTransformShift);
}
int iWHScale = 1;
#if HM_QTBT_AS_IN_JEM_QUANT
if( TU::needsBlockSizeTrafoScale( tu, compID ) )
{
iTransformShift += ADJ_QUANT_SHIFT;
iWHScale = 181;
}
#endif
const int iQBits = QUANT_SHIFT + cQP.per + iTransformShift;//计算qbit,QUANT_SHIFT =14,cQP.per=floor(QP/6)
// QBits will be OK for any internal bit depth as the reduction in transform shift is balanced by an increase in Qp_per due to QpBDOffset
const int64_t iAdd = int64_t(tu.cs->slice->isIRAP() ? 171 : 85) << int64_t(iQBits - 9);//计算f‘=f<<(qbit+T),
#if HEVC_USE_SIGN_HIDING
const int qBits8 = iQBits - 8;
#endif
for (int uiBlockPos = 0; uiBlockPos < piQCoef.area(); uiBlockPos++)
{
const TCoeff iLevel = piCoef.buf[uiBlockPos];//W(u,v),即变换后得到的系数
const TCoeff iSign = (iLevel < 0 ? -1: 1);//sign(W(u,v))
#if HEVC_USE_SCALING_LISTS//宏定义指示是否同时完成DCT变换中的伸缩因子的乘法运算,VTM中该宏定义为false
const int64_t tmpLevel = (int64_t)abs(iLevel) * (enableScalingLists ? piQuantCoeff[uiBlockPos] : defaultQuantisationCoefficient);//enableScalingLists 表示是否同时完成DCT变换中的伸缩因子的乘法运算
#else
const int64_t tmpLevel = (int64_t)abs(iLevel) * defaultQuantisationCoefficient;//|W(u,v)|x MF(q)
#endif
const TCoeff quantisedMagnitude = TCoeff((tmpLevel * iWHScale + iAdd ) >> iQBits);//根据量化公式得到量化后系数的幅度值(即绝对值),(|W(u,v)|x MF(q)+f')>>(qbit+T)
#if HEVC_USE_SIGN_HIDING
deltaU[uiBlockPos] = (TCoeff)((tmpLevel * iWHScale - ((int64_t)quantisedMagnitude<<iQBits) )>> qBits8);
#endif
uiAbsSum += quantisedMagnitude;
const TCoeff quantisedCoefficient = quantisedMagnitude * iSign;//乘以符号
piQCoef.buf[uiBlockPos] = Clip3<TCoeff>( entropyCodingMinimum, entropyCodingMaximum, quantisedCoefficient );//量化后的系数写入piQCoef.buf
} // for n
#if HEVC_USE_SIGN_HIDING
if( cctx.signHiding() && uiWidth>=4 && uiHeight>=4 )//是否需要隐藏符号,与HEVC中相同
{
if(uiAbsSum >= 2) //this prevents TUs with only one coefficient of value 1 from being tested
{
xSignBitHidingHDQ(piQCoef.buf, piCoef.buf, deltaU, cctx, maxLog2TrDynamicRange);
}
}
#endif
} //if RDOQ
//return;
}
率失真优化量化
实现量化的主体函数是void QuantRDOQ::quant()中调用的xRateDistOptQuant()
与HEVC的相似,可以参考博文:https://blog.csdn.net/qq_21747841/article/details/77483290
依赖性量化(DQ)
DQ原理详见博文VVC/VTM:变换量化——Quantization:https://blog.csdn.net/baidu_28446365/article/details/89430095
实现DQ的主体函数是void DepQuant::quant()
,slice级别的m_depQuantEnabledFlag
标志位标识是否使用DQ。
DQ进行量化的计算公式与普通量化相同,不同之处在于当前的量化参数取值与上一个量化参数取值有关。
void DepQuant::quant( TransformUnit& tu, const CCoeffBuf& srcCoeff, const ComponentID compID, const QpParam& cQP, const double lambda, const Ctx& ctx, TCoeff& absSum )
{
CHECKD( tu.cs->sps->getSpsRangeExtension().getExtendedPrecisionProcessingFlag(), "ext precision is not supported" );
//===== reset / pre-init =====
const TUParameters& tuPars = *g_Rom.getTUPars( tu.blocks[compID], compID );
m_quant.initQuantBlock ( tu, compID, cQP, lambda );
TCoeff* qCoeff = tu.getCoeffs( compID ).buf;//量化后的系数
const TCoeff* tCoeff = srcCoeff.buf;//变换后的系数
const int numCoeff = tu.blocks[compID].area();
::memset( tu.getCoeffs( compID ).buf, 0x00, numCoeff*sizeof(TCoeff) );
absSum = 0;
//===== find first test position =====
int firstTestPos = numCoeff - 1;
const TCoeff thres = m_quant.getLastThreshold();//假全零块算法,认为变换系数小于thres即为0
//从后往前按照Z字形扫描顺序(scanIdx)找到最后一个非零系数的位置,即firstTestPos,BlkPos顺序为Raster扫描顺序
for( ; firstTestPos >= 0; firstTestPos-- )
{
if (abs(tCoeff[tuPars.m_scanId2BlkPos[firstTestPos].idx]) > thres)
{
break;
}
}
if( firstTestPos < 0 )
{
return;
}
//===== real init =====
RateEstimator::initCtx( tuPars, tu, compID, ctx.getFracBitsAcess() );
m_commonCtx.reset( tuPars, *this );
for( int k = 0; k < 12; k++ )
{
m_allStates[k].init();
}
m_startState.init();
int effWidth = tuPars.m_width, effHeight = tuPars.m_height;
bool zeroOut = false;
//将大块划分为小块进行扫描(与变换尺寸相匹配)
if( ( tu.mtsIdx > 1 || ( tu.cu->sbtInfo != 0 && tuPars.m_height <= 32 && tuPars.m_width <= 32 ) ) && !tu.cu->transQuantBypass && compID == COMPONENT_Y )
{
effHeight = ( tuPars.m_height == 32 ) ? 16 : tuPars.m_height;
effWidth = ( tuPars.m_width == 32 ) ? 16 : tuPars.m_width;
zeroOut = ( effHeight < tuPars.m_height || effWidth < tuPars.m_width );
}
//===== populate trellis =====
for( int scanIdx = firstTestPos; scanIdx >= 0; scanIdx-- )
{
const ScanInfo& scanInfo = tuPars.m_scanInfo[ scanIdx ];
//得到量化后的系数,存在变量m_trellis[ MAX_TB_SIZEY * MAX_TB_SIZEY ][ 8 ]的abslevel中(不带符号)
xDecideAndUpdate( abs( tCoeff[ scanInfo.rasterPos ] ), scanInfo, zeroOut && ( scanInfo.posX >= effWidth || scanInfo.posY >= effHeight ) );
}
//===== find best path =====
Decision decision = { std::numeric_limits<int64_t>::max(), -1, -2 };
int64_t minPathCost = 0;
for( int8_t stateId = 0; stateId < 4; stateId++ )
{
int64_t pathCost = m_trellis[0][stateId].rdCost;
if( pathCost < minPathCost )
{
decision.prevId = stateId;
minPathCost = pathCost;
}
}
//===== backward scanning =====
int scanIdx = 0;
//scanIdx为Z字形扫描顺序,blkPos为Raster扫描顺序
for( ; decision.prevId >= 0; scanIdx++ )
{
decision = m_trellis[ scanIdx ][ decision.prevId ];
int32_t blkpos = tuPars.m_scanId2BlkPos[scanIdx].idx;
qCoeff[ blkpos ] = ( tCoeff[ blkpos ] < 0 ? -decision.absLevel : decision.absLevel );//带上符号
absSum += decision.absLevel;
}
}
函数xDecideAndUpdate()
为DQ量化主要步骤,他进一步调用了函数xDecide()
如下
void DepQuant::xDecide( const ScanPosType spt, const TCoeff absCoeff, const int lastOffset, Decision* decisions, bool zeroOut)
{
::memcpy( decisions, startDec, 8*sizeof(Decision) );
if( zeroOut )
{
if( spt==SCAN_EOCSBB )
{
m_skipStates[0].checkRdCostSkipSbbZeroOut( decisions[0] );
m_skipStates[1].checkRdCostSkipSbbZeroOut( decisions[1] );
m_skipStates[2].checkRdCostSkipSbbZeroOut( decisions[2] );
m_skipStates[3].checkRdCostSkipSbbZeroOut( decisions[3] );
}
return;
}
PQData pqData[4];
m_quant.preQuantCoeff( absCoeff, pqData );
//若上一个state为0,那么下一个state可能为0或2,这里计算出这两种情况下的Rdcost
m_prevStates[0].checkRdCosts( spt, pqData[0], pqData[2], decisions[0], decisions[2]);
m_prevStates[1].checkRdCosts( spt, pqData[0], pqData[2], decisions[2], decisions[0]);
m_prevStates[2].checkRdCosts( spt, pqData[3], pqData[1], decisions[1], decisions[3]);
m_prevStates[3].checkRdCosts( spt, pqData[3], pqData[1], decisions[3], decisions[1]);
if( spt==SCAN_EOCSBB )
{
m_skipStates[0].checkRdCostSkipSbb( decisions[0] );
m_skipStates[1].checkRdCostSkipSbb( decisions[1] );
m_skipStates[2].checkRdCostSkipSbb( decisions[2] );
m_skipStates[3].checkRdCostSkipSbb( decisions[3] );
}
m_startState.checkRdCostStart( lastOffset, pqData[0], decisions[0] );
m_startState.checkRdCostStart( lastOffset, pqData[2], decisions[2] );
}
好无力, 细节实在看不太懂了。。。